diff --git a/Documentation/Doxygen/dsp.dxy.in b/Documentation/Doxygen/dsp.dxy.in index 73c17573..c78d8871 100644 --- a/Documentation/Doxygen/dsp.dxy.in +++ b/Documentation/Doxygen/dsp.dxy.in @@ -573,14 +573,14 @@ HIDE_UNDOC_MEMBERS = YES # if EXTRACT_ALL is enabled. # The default value is: NO. -HIDE_UNDOC_CLASSES = NO +HIDE_UNDOC_CLASSES = YES # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. -HIDE_FRIEND_COMPOUNDS = NO +HIDE_FRIEND_COMPOUNDS = YES # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these @@ -773,7 +773,7 @@ SHOW_FILES = YES # Folder Tree View (if specified). # The default value is: YES. -SHOW_NAMESPACES = YES +SHOW_NAMESPACES = NO # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from @@ -919,11 +919,24 @@ WARN_LOGFILE = # Note: If this tag is empty the current directory is searched. INPUT = ./src/mainpage.md \ + ./src/dsppp_main.md \ + ./src/introduction.md \ + ./src/template.md \ + ./src/guidelines.md \ + ./src/vectorop.md \ + ./src/memory_allocator.md \ + ./src/memory_static_dynamic.md \ + ./src/code_size.md \ + ./src/fusion.md \ + ./src/vector.md \ + ./src/matrix.md \ + ./src/building.md \ ./src/history.md \ ./src/history.txt \ ../../Examples/ARM \ ../../Include/ \ - ../../Source/ \ + ../../Source/ \ + ../../dsppp/Include # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -2417,7 +2430,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = ARM_MATH_NEON=1 ARM_FLOAT16_SUPPORTED=1 __STATIC_FORCEINLINE= __ALIGNED(x)= +PREDEFINED = DOXYGEN HAS_VECTOR HAS_PREDICATED_LOOP ARM_MATH_NEON=1 ARM_FLOAT16_SUPPORTED=1 __STATIC_FORCEINLINE= __ALIGNED(x)= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/Documentation/Doxygen/src/building.md b/Documentation/Doxygen/src/building.md new file mode 100644 index 00000000..0cdf6b41 --- /dev/null +++ b/Documentation/Doxygen/src/building.md @@ -0,0 +1,29 @@ +# Building and running examples {#dsppp_building} + +## To build + +First time: + +```shell +cbuild -O cprj test.csolution.yml --toolchain AC6 -c example.Release+VHT-Corstone-300 -p -r --update-rte + +``` + +Other times: + +```shell +cbuild -O cprj test.csolution.yml --toolchain AC6 -c example.Release+VHT-Corstone-300 +``` + +If you want to select another test, edit the file `example.cproject.yml` and uncomment the test. + +## To run + +If the tools have been installed with `vcpkg`: + +``` +FVP_Corstone_SSE-300_Ethos-U55.exe -f fvp_configs/VHT-Corstone-300.txt -a cpu0=cprj\out\example\VHT-Corstone-300\Release\example.axf +``` + +Otherwise, you'll need to use the path to your FVP installation. + diff --git a/Documentation/Doxygen/src/code_size.md b/Documentation/Doxygen/src/code_size.md new file mode 100644 index 00000000..f06cf696 --- /dev/null +++ b/Documentation/Doxygen/src/code_size.md @@ -0,0 +1,14 @@ +# Code size {#dsppp_code_size} + +It was explained in previous sections that types `Vector` and `Vector` are considered as different types if `NB1` and `NB2` are differents. + +A template algorithm is like a code generator that will generate different code for different values of the template arguments : the types. + +If you use a template algorithm with different vector datatypes, it will generate different code for those two datatypes. The generated code will be specialized for the specific datatypes used and thus is likely to be more efficient. + +But then it means you get different implementations so more code size. + +If you have a lot of different sizes in your system, then you're likely to get too much code size and in that case it may be better to use dynamic objects instead of static ones. + +dynamic objects are less efficient so it is a trade-off between code size / speed. + diff --git a/Documentation/Doxygen/src/dsppp_main.md b/Documentation/Doxygen/src/dsppp_main.md new file mode 100644 index 00000000..df5325db --- /dev/null +++ b/Documentation/Doxygen/src/dsppp_main.md @@ -0,0 +1,18 @@ +# DSP++ extension {#dsppp_main} + +C++ extensions to CMSIS-DSP using C++ template meta-programming (headers only). + +The headers are not yet part of the CMSIS-DSP pack since they are experimental. You can get them from the [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/dsppp/Include). There is nothing to build. Just include the headers when you want to use this framework. + +* @subpage dsppp_intro "Introduction" +* @subpage dsppp_template "C++ template for C programmer" +* @subpage dsppp_vector_example "Vector operation example" +* @subpage dsppp_memory_allocator "Memory allocation" +* @subpage dsppp_memory_static_dynamic "Static / Dynamic objects" +* @subpage dsppp_code_size "Code size" +* @subpage dsppp_fusion "Fusion mechanism" +* @subpage dsppp_vector "Vector operators" +* @subpage dsppp_matrix "Matrix operators" +* @subpage dsppp_building "Building and running examples" +* @subpage dsppp_guidelines "Usage guidelines" + diff --git a/Documentation/Doxygen/src/fusion.md b/Documentation/Doxygen/src/fusion.md new file mode 100644 index 00000000..cec2f4f8 --- /dev/null +++ b/Documentation/Doxygen/src/fusion.md @@ -0,0 +1,39 @@ +# Fusion {#dsppp_fusion} + +```cpp +Vector d = a + b * c; +``` + +With this line of code, there is loop fusion : instead of having one loop per operator there is one loop for the whole computation. + +It is important to have some ideas of how it works to avoid some mistake in the use of the library. + +In above code, `a + b * c` is not computing anything ! +`a + b * c` is creating a representation of the expression : an abstract syntax tree (AST) at build time. + +When this AST is assigned to the variable `d` it is evaluated. +The evaluation forces the inlining of the expression operators in one loop. The code generated thus contains only one loop with a fusion of all the operators : `+` and `*`. + +The library is supporting virtual vectors. They are a view on an existing part of a vector. You can use a virtual vector for instance to read some samples with a stride. Or write some samples with a stride. A virtual vector does not own its memory. + +If you write: +```cpp +d = a; +``` + +and `d` and `a` are virtual vectors then nothing will be written to `d` ! + +`d` will becomes `a` and `a` will no more be valid. + +If you want to copy a virtual vector you need to make an expression and write: + +```cpp +d = copy(a); +``` + +Note that this problem occurs only for virtual vectors who do not own their memory. + +For real vectors, a copy would occur. But since there is no overhead in adding `copy` it is better to do it to avoid problems. + + + diff --git a/Documentation/Doxygen/src/guidelines.md b/Documentation/Doxygen/src/guidelines.md new file mode 100644 index 00000000..bd816378 --- /dev/null +++ b/Documentation/Doxygen/src/guidelines.md @@ -0,0 +1 @@ +# Guidelines {#dsppp_guidelines} diff --git a/Documentation/Doxygen/src/introduction.md b/Documentation/Doxygen/src/introduction.md new file mode 100644 index 00000000..4814dab6 --- /dev/null +++ b/Documentation/Doxygen/src/introduction.md @@ -0,0 +1,64 @@ +## Introduction {#dsppp_intro} + +### Dot product example + +If you want to compute the dot product: + +\f[ + + + +\f] + +with CMSIS-DSP, you would write: + +```c +arm_add_f32(a,b,tmp1,NB); +arm_scale_f32(tmp1,scale,tmp2,NB); +arm_mult_f32(c,d,tmp3,NB); +arm_dot_prod_f32(tmp2,tmp3,NB,&r); +``` + +There are several limitations with this way of writing the code: + +1. The code needs to be rewritten and the `_f32` suffix changed if the developer wants to use another datatype + +2. Temporary buffers need to be allocated and managed (`tmp1`,`tmp2`,`tmp3`,`tmp4`) + +3. The four function calls are four different loops. It is not good for data locality and caches. The computation is not done in one pass + +4. Each loop contains a small number of instructions. For instance, for the `arm_add_f32`, two loads, an add instruction and a store. It is not enough to enable the compiler to reorder the instructions to improve the performance + +With this new C++ template library, you can write: + + +```cpp +r = dot(scale*(a+b),c*d); +``` + +The code generated by this line computes the dot product in one pass with all the operators (`+`, `*`) included in the loop. +There is no more any temporary buffers. + +### Vector operations + +Let's look at another example: + +\f[ + +\overrightarrow{d} = \overrightarrow{a} + \overrightarrow{b} * \overrightarrow{c} + +\f] + +With the C++ library, it can be written as: + + +```cpp +Vector d = a + b * c; +``` + +Here again : all the vector operations (`+`,`*`) are done in one pass with one loop. There is no more any temporary buffer. + +If you're coming from C and does not know anything about C++ templates, we have a very quick introduction : @ref dsppp_template "The minimum you need to know about C++ template to use this library". + +You can also jump directly to an @ref dsppp_vector_example "example with vector operations". + diff --git a/Documentation/Doxygen/src/mainpage.md b/Documentation/Doxygen/src/mainpage.md index f33a65b9..d081f0a3 100644 --- a/Documentation/Doxygen/src/mainpage.md +++ b/Documentation/Doxygen/src/mainpage.md @@ -1,5 +1,7 @@ # Overview {#mainpage} +## Introduction + This user manual describes the CMSIS DSP software library, a suite of common compute processing functions for use on Cortex-M and Cortex-A processor based devices. The library is divided into a number of functions each covering a specific category: @@ -26,9 +28,21 @@ The library is providing vectorized versions of most algorithms for Helium and o When using a vectorized version, provide a little bit of padding after the end of a buffer (3 words) because the vectorized code may read a little bit after the end of a buffer. You don't have to modify your buffers but just ensure that the end of buffer + padding is not outside of a memory region. +## Related projects + +### Python wrapper + A Python wrapper is also available with a Python API as close as possible to the C one. It can be used to start developing and testing an algorithm with NumPy and SciPy before writing the C version. Is is available on [PyPI.org](https://pypi.org/project/cmsisdsp/). It can be installed with: `pip install cmsisdsp`. -## Using the Library {#using} +### Experimental C++ template extension + +This extension is a set of C++ headers. They just need to included to start using the features. + +Those headers are not yet part of the pack and you need to get them from the [github repository](https://github.com/ARM-software/CMSIS-DSP/tree/main/Include) + +More documentation about the @ref dsppp_main "DSP++" extension. + +## Using the CMSIS-DSP Library {#using} The library is released in source form. It is strongly advised to compile the library using `-Ofast` optimization to have the best performances. @@ -56,6 +70,7 @@ The table below explains the content of **ARM::CMSIS-DSP** pack. 📂 Include | Include files for using and building the lib 📂 PrivateInclude | Private include files for building the lib 📂 Source | Source files + 📂 dsppp | Experimental C++ teamplate extension 📄 ARM.CMSIS-DSP.pdsc | CMSIS-Pack description file 📄 LICENSE | License Agreement (Apache 2.0) diff --git a/Documentation/Doxygen/src/matrix.md b/Documentation/Doxygen/src/matrix.md new file mode 100644 index 00000000..c3c983b0 --- /dev/null +++ b/Documentation/Doxygen/src/matrix.md @@ -0,0 +1,168 @@ +# Matrix {#dsppp_matrix} + +Matrixes can be used similarly to vectors: + +```cpp +Matrix a; +Matrix b; +``` + +If the dimensions of the matrixes are not known at build time, you would instead write: + +``` +Matrix a(rows,cols); +Matrix b(rows,cols); +``` + +Once you have matrixes, you need to initialize them. A matrix is also a vector, so you can initialize it by indexing into the vector: + +```cpp +for(std::size_t i=0;i result = a * a + b; +``` + +The operators `+` and `*` are merged into the loop. `*` is the element-wise multiply. For the vector / matrix products you should use the operator `dot`. + +Note that fusion of operators will not work with `dot(Matrix, Matrix`). It is only supported with vectors : `dot(Vector,Vector)` or `dot(Matrix,Vector)`. + +## VectorView + +We can create virtual vectors which are view of some slices of the matrix. + +### Row vector + +To set the second row to `0.0f`, you can do: + +``` +result.row(1) = 0.0f; +``` + +To set the odd elements of the 3rd row to `0.0f` we can do: + +``` +result.row<2>(2,1) = 0.0f; +``` + +The first argument `2` is the row number (starting from `0`). + +The second argument `1` is where is the row we start the view : element `1`. + +`<2>` is the stride known at built time. + +The `row` API is: + +```cpp +template +VectorView row(const index_t i,const index_t start=0,const index_t stop=C) + +``` + +`stop` is the index of the first element **after** the end of the view. + +`i` is the row index + +### Column vector + +There is a similar API for columns. + +Let's set the odd elements of columns 3 to `5.0f`: + +``` +result.col<2>(2,1) = 5.0f; +``` + +## MatrixView + +It is also possible to create a virtual matrix : a view onto a subset of the matrix. + +Let's add the bottom right corner of the matrix to itself: + +```cpp +result.sub(4,8,4,8) = result.sub(4,8,4,8) + result.sub(4,8,4,8) +``` + +The API is: + +```cpp +MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) +``` + +You specify the row start and row end, then column start and column end. + +Note that the end is the first index **after** the end of your rows or columns. + +No stride is supported for matrix view in this version of the library. + +## Matrix operations + +In addition to the vector operations `+`,`-` and `*`, matrixes are supporting more operations: + +* `dot` for vector / matrix products +* `diagonal` to create a diagonal matrix from a vector. +* `identity` to create an identity matrix +* `tranpose` to create the transposed matrix +* `outer` for the outer product of two vectors + +### dot + +```cpp +result = dot(a,b); +``` + +The compiler may use the move semantic to copy the temporary result of the `dot` function to `result`. + +In this case, no copy would occur and `result` after the assignment would be a vector allocated by `dot` so using the `TMP_ALLOC` . + +### diagonal + +```cpp +result = Matrix::diagonal(c); +``` + +### identity + +```cpp +result = Matrix::identity(); +``` + +### transpose + +```cpp +result = a.transpose(); +``` + +or + +```cpp +transposeTo(result,a); +``` + +### outer product + +```cpp +result = outer(c,c); +``` + diff --git a/Documentation/Doxygen/src/memory_allocator.md b/Documentation/Doxygen/src/memory_allocator.md new file mode 100644 index 00000000..a539a310 --- /dev/null +++ b/Documentation/Doxygen/src/memory_allocator.md @@ -0,0 +1,87 @@ +# Memory allocation {#dsppp_memory_allocator} + +By default, `malloc` is used. + +```cpp +Vector +``` + +is allocating a vector of dimension `NB` (known at build time) and datatype `float32_t`. + +The definition of the `Vector` template is: + +```cpp +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

+``` + +It means that by default the memory allocator is `TMP_ALLOC`. + +This `TMP_ALLOC` `#define` can be changed if you define it before including any header from the library. + +An allocator should implement a template like: + +```cpp +template +struct malloc_allocator { + /* Dynamic dimension allocations (L<0) */ + static char* allocate ( vector_length_t sz) noexcept; + + /* Dimension L know at build time (L > 0) */ + static char* allocate ( ) noexcept; + + static void destroy ( char* ptr ) noexcept; + +}; +``` + +It has no state because in practice we observed that compilers were generating more efficient code without state in the memory allocator template. + +If you don't want to use a `malloc` based memory allocator, you can replace it with your own memory allocator and implement an API like the one just shown in `malloc_allocator`. + +For instance, often in DSP pipelines, the dimensions of the vectors and matrixes are fixed and known at build time. +In that case, you could replace the memory allocator by one using memory pools. + +With memory pools, allocation is nearly cost free and there is no fragmentation. + +The test framework of the library is providing an example in `allocator.h` and `allocator.cpp`. + +There are two memory allocators: + +1. `stat_allocator` is a `malloc` based allocator that is making statistics on the memory allocations and how many buffers of each dimension is required + +2. `pool_allocator` can use the data generated by `stat_allocator`to pre-allocate memory pools that will be then used for the memory allocations. The memory pools are also creating aligned buffers. + +It is no more difficult (and less difficult) than allocating temporary buffers in CMSIS-DSP. + +You could define the `TMP_ALLOC` with: + +```cpp +#if defined(POOL_ALLOCATOR) +#define TMP_ALLOC pool_allocator +#else +#define TMP_ALLOC stat_allocator +#endif +``` + +You use `stat_allocator` by default. When your code is working, you switch to `pool_allocator` to have better performance and determinism. + +Another possibility is to use different vector types: + +```cpp +template +using PVector = Vector; +``` + +Note that you cannot avoid using `TMP_ALLOC` because some functions in the library are creating temporary objects. For instance, if you want to make an identity matrix, you can use ` mk_identity` that will make a memory allocation using `TMP_ALLOC` + +Also note that if you create a vector with: + +```cpp +Vector v(NB); +``` + +then the dimension `NB` is a runtime parameter. The memory pool allocator given as example in this library is only working with dimensions known at build time. For runtime dimensions, it is still using a `malloc`. + diff --git a/Documentation/Doxygen/src/memory_static_dynamic.md b/Documentation/Doxygen/src/memory_static_dynamic.md new file mode 100644 index 00000000..a1d91181 --- /dev/null +++ b/Documentation/Doxygen/src/memory_static_dynamic.md @@ -0,0 +1,35 @@ +# Static / dynamic {#dsppp_memory_static_dynamic} + +As we have seen in the previous sections, there are two kind of vectors: + +* `Vector` with a dimension know at runtime +* `Vector` with a dimension known at build time + +The former vectors are called "dynamic" ins this library. The later are called "static". + +This naming "static" / "dynamic" is referring to the dimension. With "dynamic" vectors the same code can, at runtime, create vectors of different length based on a runtime length. + +With "static" vectors : the length is fixed at build time and will never change at runtime. + +Note that the library also have "static" / "dynamic" matrixes. So, we are going to use "objects" to cover both cases + +# Static objects + +The advantage of static objects is that the dimension is known at build time. The compiler can thus generate an algorithm that is specialized for those dimensions and thus more efficient. + +With static objects it is also possible to use different memory allocator with better performances and determinism. + +But, with static objects, objects of different dimension are considered as different types. The compiler will generate different implementation so it will have an impact on the code dimension. + +If you need lots of objects of different dimensions, or if the dimensions are nort known at build time, then you need to use dynamic object + +# Dynamic objects + +With dynamic objects, the dimension is know at runtime. So object of different dimensions have the same datatype and the compiler is generating only one implementation for all those objects. It cannot generate specialized implementations based on the dimension. It is better for code size, but the implementations will be less efficient. + +Also when dimension is not know at build time, some instruction selection made by the C++ library at build time is no more possible. It has an effect on performance since at runtime one must decides what's possible or not. It is mostly impacting matrixes where stride information is needed. + +With vector instructions one can use scatter / gather instructions and they require a stride. But there are constraints depending on the datatype and when the stride is too big for those instructions, they cannot be used. This check has to be done at runtime for dynamic object. + +Finally, with dynamic object, memory allocation can be an issue. You can mitigate the problem by reusing temporaries in your algorithms instead of re-allocating them. But it makes the implementation more difficult. See section about @ref dsppp_guidelines. + diff --git a/Documentation/Doxygen/src/template.md b/Documentation/Doxygen/src/template.md new file mode 100644 index 00000000..16b4994a --- /dev/null +++ b/Documentation/Doxygen/src/template.md @@ -0,0 +1,60 @@ +# What you need to know about C++ templates {#dsppp_template} + +## What is a template useful for ? + +In CMSIS-DSP, you have functions like: + +* `arm_add_f32` +* `arm_add_f64` + +Without unrolling, the scalar implementation is the same but is duplicated (two different source files to maintain although they are nearly the same). + +One could try to reuse the same source for both functions using C preprocessor. But we would still have two different functions with different names at the end (both generated from the same C + C preprocessor macros) + +With C++ templates, we can achieve the same result in a better way since the C++ compiler will check the templates and typecheck them. In addition to that, both functions can share the same name. + +With C++ template, we could have a *generic* function `arm_add` taking as argument a pointer `T *pSrc` where `T` is a type variable ! + +When the function is used with a `float32_t *`, the compiler would generate code for a function using `float32_t`. + +And if the function is used with a `float64_t *`, the compiler would generate code for a function using `float64_t`. + +The generic `arm_add` source code is a template used to generate different implementations. It is like a code generator. + +And if the compiler is unable to generate an implementation because the type variable `T` is replaced by a type with no addition operator, then it would be detected by the compiler. + +## Templates for datatypes + +C++ templates also apply to structs and classes. + +For instance, we could have a template `Vector` and thus different types `Vector`, `Vector` ... + +There is another aspect of C++ templates that may be surprising : the types can contain numbers. + +For instance, one could have a type +`Vector` for a vector of `float` and of length `10`. The length being known at build time. + +The types `Vector` and `Vector` should be considered as different types because they have different lengths. The length is part of the type. + +What we said above for code generation applies. For a template algorithm using any kind of vector, the compiler would generate different code for different vector types. The code for a template algorithm using `Vector` would be different from the code for `Vector` because those two types are different. + + +## Implicit parameters + +A template can also have implicit parameters. + +For instance one could use `Vector` or `Vector`. + +In the first case, the length is an implicit parameter with a default value and it is equivalent to writing `Vector` where `DYNAMIC` could be a special value (negative for instance) used to tell the compiler that the length of the vector is not known at build time but only at runtime. + +Both variants may use totally different implementations. The `DYNAMIC` variant may contain a `length` field in the `struct` definition whereas other variants do not need this field since the length is known at build time. + +## How to use templates ? + +A template is just a C++ header. You only need to include this header to start using the template. There is nothing to build. + +## Example + +Now you can look at an @ref dsppp_vector_example "example with vector operations" showing how to use the library + + diff --git a/Documentation/Doxygen/src/vector.md b/Documentation/Doxygen/src/vector.md new file mode 100644 index 00000000..546338fe --- /dev/null +++ b/Documentation/Doxygen/src/vector.md @@ -0,0 +1,112 @@ +# Vector {#dsppp_vector} + +The use of vectors has been explained in @ref dsppp_vector_example "example with vector operations" and focusing on `float32_t`. + +The vector template is defined as: + +```cpp +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

+``` + +* `P` is the datatype of vector elements +* `L` is the static length of the vector (length known at build time). `L<0` when the length is dynamic and not known at build time. It is the default value. +* `Allocator` is the memory allocator. By default it is `TMP_ALLOC` that you can redefine since it is a macro +* `Vector_Base

` is providing the storage. A vector owns its storage buffer. + +## Q15 example + +Example with `Q15` is very similar: + +The vectors are defined: + +```cpp +Vector aQ15; +Vector bQ15; +Vector cQ15; +``` + +They are initialized: + +```cpp +for(int i = 0;i< NB;i++) +{ + aQ15[i] = bQ15[i] = cQ15[i] = Q15(i); +} +``` + +Here, the `Q15` value is initialized from the int value `i` and thus represents \f$ i/2^{15} \f$ + +Some computation is done + +```cpp +Vector dQ15 = aQ15 + bQ15 * cQ15; +``` + +The result is displayed: + +```cpp +std::cout << "Result = " << dQ15 ; +``` + +## VectorView + +A vector view is a virtual vector : a view of a vector. + +One can define a `VectorView` with: + +```cpp +auto subD = d.sub(2); +``` + +This is creating a virtual vector starting at index `2` (3rd element) of vector `d`. + +You can then operate with this virtual vector: + +```cpp +subD = subD + 2.0f; +``` + +If you display the vector `d`, you'll see that `2.0f` has been added to all elements starting from the 2rd one. + +`VectorView` do not own their memory. It is owned by the original vector. + +If you write: + +```cpp +x = y +``` + +and `x` and `y` are `VectorView`, no copy will occur. `x` will just reference the same data as `y`. If you want to copy you have to be explicit and write: + +```cpp +x = copy(y) +``` + +It is advised to always use the `copy` operator (even with normal vectors). + +Virtual vectors can have a stride: + +```cpp +d.sub<2>(1) = 0.0f; +``` + +This line sets the odd elements of the vector to `0.0f`. It is creating a vvirtual vector with stride `2` and starting at index `1` of first vector. + +Then, all elements of this virtual vector are set to `0.0f`. + +The `sub` API is: + +```cpp +template +VectorView sub(const index_t start=0,const index_t stop=L) +``` + +You can define: + +* The stride `S` : statically known and by default `1`. +* The start of the view (`0` by default) +* The end of the view (`L` by default : the length known at build time). Note that it is the first index **after** the end of the vector. + diff --git a/Documentation/Doxygen/src/vectorop.md b/Documentation/Doxygen/src/vectorop.md new file mode 100644 index 00000000..aed42944 --- /dev/null +++ b/Documentation/Doxygen/src/vectorop.md @@ -0,0 +1,100 @@ +# Vector operation example {#dsppp_vector_example} + +To compute: + +\f[ + +\overrightarrow{d} = \overrightarrow{a} + \overrightarrow{b} * \overrightarrow{c} + +\f] + +we need to: +1. Include the right header files +2. allocate the vectors +3. initialize the vectors +4. make the computation. + +# Include the headers + +The headers are not yet part of the CMSIS-DSP packs since they are experimental. You can get them from the [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/CPP) + +```cpp +#include +#include + +using namespace arm_cmsis_dsp; +``` + +If fixed point datatypes are required, `#include ` should be used before `` + +Fixed point requires the use of CMSIS-DSP. + +# Creation of the vectors + +To create a vector `a` you would write: + +```cpp +constexpr int NB = 32; + +Vector a; +Vector b; +Vector c; +``` + +`Vector` is creating a vector of dimension `NB` (known at build time) and datatype `float32_t`. This creation is requiring some memory allocation and by default it is done with a `malloc`. + +It is possible to change the memory allocator for the vectors (and it is advised) to avoid using `malloc` and instead have deterministic allocation without fragmentation. + +See section @ref dsppp_memory_allocator "Memory allocation". + +Vectors of different dimensions are considered as being different types. + +If you don't know the dimension at build time, you can use a different type of vector with: + +```cpp +Vector a(NB); +``` + +For the trade-off between vector with build time dimension or runtime dimension please see the section @ref dsppp_memory_static_dynamic . + +# Initialization of the vectors + +You can index the vectors as normal C arrays. + +```cpp +for(int i = 0;i< NB;i++) +{ + a[i] = b[i] = c[i] = i; +} +``` + +# Computation + +The computation can be written normally as : + +```cpp +Vector d = a + b * c; +``` + +Note that the computation can be parametrized with template arguments so the same computation could be used with any datatype or length. In that case you would have to define a template (and not just a normal function) and inside you would use something like: + +```cpp +Vector d = a + b * c; +``` + +where `T` is a type variable coming from the template. + +The operators `+`, `*` are computed in one pass with one loop : we have loop fusion and instead of having a loop per operator we have a loop for the whole computation. + +To understand fusion and how to extend it with new operators, see section @ref dsppp_fusion . + +For an overview of vector operators, see section @ref dsppp_vector . +For an overview of matrix operators, see section @ref dsppp_matrix . + +# Displaying the result + +The vectors can be displayed on `stdout` for debug purpose. + +```cpp +std::cout << "Result = " << d ; +``` diff --git a/dsppp/.gitignore b/dsppp/.gitignore new file mode 100644 index 00000000..0cd7f9a8 --- /dev/null +++ b/dsppp/.gitignore @@ -0,0 +1,13 @@ +build_* +allocation/* +out/ +tmp/ +__pycache__/ +**.DS_Store +*.cprj +cprj/*.cbuild*.yml +dump_* +run_*.bat +ac6_results/ +gcc_results/ +clang_results/ diff --git a/dsppp/Examples/dot_product.cpp b/dsppp/Examples/dot_product.cpp new file mode 100644 index 00000000..c1ee8146 --- /dev/null +++ b/dsppp/Examples/dot_product.cpp @@ -0,0 +1,52 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Dot product example\r\n"; + + constexpr int NB = 32; + + Vector a; + Vector b; + Vector c; + Vector d; + + float32_t scale = 0.5; + + for(int i = 0;i< NB;i++) + { + a[i] = b[i] = c[i] = d[i] = i; + } + + float32_t r; + + r = dot(scale*(a+b),c*d); + + std::cout << "Result = " << r << "\r\n"; + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Examples/matrix_op.cpp b/dsppp/Examples/matrix_op.cpp new file mode 100644 index 00000000..f9fa1231 --- /dev/null +++ b/dsppp/Examples/matrix_op.cpp @@ -0,0 +1,109 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Matrix operation examples\r\n"; + + constexpr int ROWS = 8; + constexpr int COLS = 8; + + Matrix a; + Matrix b; + + for(std::size_t i=0;i result = a * a + b; + + std::cout << "Result = " << std::endl << result ; + + // Vector views + + // Rows + result.row(1) = 0.0f; + std::cout << "Result = " << std::endl << result ; + + // Row with stride + // setting odd elements of 3rd row to 0 + result.row<2>(2,1) = 0.0f; + std::cout << "Result = " << std::endl << result ; + + // Column with stride + result.col<2>(2,1) = 5.0f; + std::cout << "Result = " << std::endl << result ; + + // Matrix view + result.sub(4,8,4,8) = result.sub(4,8,4,8) + result.sub(4,8,4,8); + std::cout << "Result = " << std::endl << result ; + + // operators + // dot + result = dot(a,b); + std::cout << "Result = " << std::endl << result ; + + // diagonal + Vector c; + + for(int i = 0;i< ROWS;i++) + { + c[i] = i; + } + result = Matrix::diagonal(c); + + std::cout << "Result = " << std::endl << result ; + + // identity matrix + result = Matrix::identity(); + + std::cout << "Result = " << std::endl << result ; + + // transpose matrix + result = a.transpose(); + + std::cout << "Result = " << std::endl << result ; + + transposeTo(result,a); + + std::cout << "Result = " << std::endl << result ; + + // outer product + result = outer(c,c); + std::cout << "Result = " << std::endl << result ; + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Examples/vector_op.cpp b/dsppp/Examples/vector_op.cpp new file mode 100644 index 00000000..6964fc3b --- /dev/null +++ b/dsppp/Examples/vector_op.cpp @@ -0,0 +1,83 @@ + +#include "RTE_Components.h" +#include CMSIS_device_header + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#include + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + + std::cout << "Vector operation examples\r\n"; + + constexpr int NB = 32; + + // float32 example + + Vector a; + Vector b; + Vector c; + + for(int i = 0;i< NB;i++) + { + a[i] = b[i] = c[i] = i; + } + + + Vector d = a + b * c; + + + std::cout << "Result = " << d ; + + // Vector view example 1 + auto subD = d.sub(2); + subD = subD + 2.0f; + + // d vector has been modified starting from the 3rd element + // (index 2) + std::cout << "Result = " << d ; + + // Now we set all odd elements to 0. + d.sub<2>(1) = 0.0f; + std::cout << "Result = " << d ; + + + // Q15 example + Vector aQ15; + Vector bQ15; + Vector cQ15; + + for(int i = 0;i< NB;i++) + { + aQ15[i] = bQ15[i] = cQ15[i] = Q15(i); + } + + + Vector dQ15 = aQ15 + bQ15 * cQ15; + + + std::cout << "Result = " << dQ15 ; + + + + +#if defined(MPS3) + while(1); +#endif +} + + diff --git a/dsppp/Include/dsppp/DSP/basic.hpp b/dsppp/Include/dsppp/DSP/basic.hpp new file mode 100644 index 00000000..9032412e --- /dev/null +++ b/dsppp/Include/dsppp/DSP/basic.hpp @@ -0,0 +1,256 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup ARCHALG + * \addtogroup DSPALG DSP Extension specific algorithm + * \ingroup ARCHALG + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + +#define DSP_UNROLL 1 + +template() && + IsVector::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + vector_length_t l, + const DSP* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t i; + + for(i=0 ; i <= l-(nb_lanes<() && + must_use_matrix_idx() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const DSP* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t row=0; + + for(; row <= rows-(1<() && + vector_idx_pair(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + constexpr unsigned int U = DSP_UNROLL; + index_t i; + + for(i=0 ; i <= l-(nb_lanes<() && + must_use_matrix_idx_pair(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t row=0; + + for(; row <= rows-(1<() && + vector_idx_pair(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const DSP* = nullptr) +{ + using Acc = DotResult; + using T = typename traits::Scalar; + using Temp = typename vector_traits::temp_accumulator; + constexpr int nb_lanes = vector_traits::nb_lanes; + constexpr unsigned int U = DSP_UNROLL; + index_t i; + + Acc acc = Acc{}; + Temp vacc = vector_traits::temp_acc_zero(); + + for(i=0 ; i <= l-(nb_lanes<() && + vector_idx_pair(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const DSP* = nullptr) +{ + using Scalar = typename ElementType::type; + using Vector = typename vector_traits::vector; + + constexpr int nb_lanes = vector_traits::type>::nb_lanes; + index_t i=0; + Vector tmpa,tmpb; + + for(i=0 ; i <= l-nb_lanes; i += nb_lanes) + { + tmpa = a.vector_op(i); + tmpb = b.vector_op(i); + b.vector_store(i,tmpa); + a.vector_store(i,tmpb); + } + + for(;i::Scalar,Q15>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _arm_mat_trans( + const MA &src, + MB &dst, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + T *pIn = src.ptr(); /* input data matrix pointer */ + T *pOut = dst.ptr(); /* output data matrix pointer */ + uint16_t nRows = src.rows(); /* number of rows */ + uint16_t nCols = src.columns(); /* number of columns */ + uint32_t col, row = nRows, i = 0U; /* Loop counters */ + + VEC in; /* variable to hold temporary output */ + + /* Matrix transpose by exchanging the rows with columns */ + /* row loop */ + do + { + /* Pointer pOut is set to starting address of column being processed */ + pOut = dst.ptr() + i; + + + /* Loop unrolling: Compute 4 outputs at a time */ + col = nCols / (2*nb_lanes); + + while (col > 0U) /* column loop */ + { + /* Read two elements from row */ + in = inner::vload1<1>(pIn); + pIn += nb_lanes; + + /* Unpack and store one element in destination */ + *pOut = Q15(in.v); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Unpack and store second element in destination */ + *pOut = Q15((in.v & (q31_t) 0xffff0000) >> 16); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Read two elements from row */ + in = inner::vload1<1>(pIn); + pIn += nb_lanes; + + /* Unpack and store one element in destination */ + *pOut = Q15(in.v); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Unpack and store second element in destination */ + *pOut = Q15((in & (q31_t) 0xffff0000) >> 16); + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Decrement column loop counter */ + col--; + } + + /* Loop unrolling: Compute remaining outputs */ + col = nCols & (2*nb_lanes-1); + while (col > 0U) + { + /* Read and store input element in destination */ + *pOut = *pIn++; + + /* Update pointer pOut to point to next row of transposed matrix */ + pOut += dst.stride(); + + /* Decrement column loop counter */ + col--; + } + + pIn += (src.stride()-nCols); + + i ++; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); /* row loop end */ + + + +} + + +template::Scalar,Q31>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + uint32_t numRows = m.rows(); + uint32_t numCols = m.columns(); + const T *pSrcA = m.ptr(); + const T *pInA1; /* input data matrix pointer A of Q15 type */ + const T *pInA2; /* input data matrix pointer A of Q15 type */ + const T *pInA3; /* input data matrix pointer A of Q15 type */ + const T *pInA4; /* input data matrix pointer A of Q15 type */ + T *px; /* Temporary output data matrix pointer */ + uint16_t i, row; /* loop counters */ + int16_t colCnt; + VEC matData, matData2, vecData, vecData2; + T tmpData; + + + /* Process 4 rows at a time */ + row = numRows >> 2; + i = 0u; + px = res.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with the vector */ + /* row loop */ + while (row > 0) { + /* Initialize accumulators */ + ACC sum1 = ACC{}; + ACC sum2 = ACC{}; + ACC sum3 = ACC{}; + ACC sum4 = ACC{}; + + /* For every row wise process, the pInVec pointer is set + ** to the starting address of the vector */ + + /* Loop unrolling: process 2 columns per iteration */ + + /* Initialize pointers to the starting address of the column being processed */ + pInA1 = pSrcA + i; + pInA2 = pInA1 + m.stride(); + pInA3 = pInA2 + m.stride(); + pInA4 = pInA3 + m.stride(); + + // Main loop: matrix-vector multiplication + for(colCnt = 0 ; colCnt <= (int16_t)numCols - nb_lanes; colCnt += nb_lanes) + { + // Read 2 values from vector + vecData = v.vector_op(colCnt); + + // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate + matData = inner::vload1<1> (pInA1); + pInA1 += nb_lanes; + sum1 = inner::vmacc(sum1, matData, vecData); + + matData = inner::vload1<1> (pInA2); + pInA2 += nb_lanes; + sum2 = inner::vmacc(sum2, matData, vecData); + + matData = inner::vload1<1> (pInA3); + pInA3 += nb_lanes; + sum3 = inner::vmacc(sum3, matData, vecData); + + matData = inner::vload1<1> (pInA4); + pInA4 += nb_lanes; + sum4 = inner::vmacc(sum4, matData, vecData); + + // Decrement the loop counter + } + + /* process any remaining columns */ + + for(; colCnt < (int16_t)numCols; colCnt ++) + { + tmpData = v[colCnt]; + sum1 = inner::mac(sum1,*pInA1++ , tmpData); + sum2 = inner::mac(sum2,*pInA2++ , tmpData); + sum3 = inner::mac(sum3,*pInA3++ , tmpData); + sum4 = inner::mac(sum4,*pInA4++ , tmpData); + } + + /* Saturate and store the result in the destination buffer */ + *px++ = inner::from_accumulator(sum1); + *px++ = inner::from_accumulator(sum2); + *px++ = inner::from_accumulator(sum3); + *px++ = inner::from_accumulator(sum4); + + i = i + m.stride() * 4; + + /* Decrement the row loop counter */ + row--; + } + + /* process any remaining rows */ + row = numRows & 3u; + while (row > 0) { + + ACC sum = ACC{}; + pInA1 = pSrcA + i; + + // loop unrolling - process 4 elements at a time + + for(colCnt = 0 ; colCnt <= (int16_t)numCols - 2*nb_lanes; colCnt += 2*nb_lanes) + { + vecData = v.vector_op(colCnt); + vecData2 = v.vector_op(colCnt+nb_lanes); + + matData = inner::vload1<1>(pInA1); + pInA1 += nb_lanes; + matData2 = inner::vload1<1>(pInA1); + pInA1 += nb_lanes; + sum = inner::vmacc(sum, matData, vecData); + sum = inner::vmacc(sum, matData2, vecData2); + } + + // process remainder of row + for(; colCnt < (int16_t)numCols; colCnt ++) + { + + sum = inner::mac(sum, *pInA1++ , v[colCnt]); + } + *px++ = inner::from_accumulator(sum); + i = i + m.stride(); + row--; + } +} + +template::Scalar,Q31>::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const DSP* = nullptr) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + ACC sum; /* Accumulator */ + + + T *pSrcBT = BT.ptr(); /* Input data matrix pointer for transpose */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A of Q15 type */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B of Q15 type */ + T *px; /* Temporary output data matrix pointer */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint16_t numRowsB = pSrcB.rows(); /* Number of rows of input matrix B */ + uint32_t col, i = 0U, row = numRowsB, colCnt; /* Loop counters */ + + VEC inA1, inB1, inA2, inB2; + + + /* Reset variables for usage in following multiplication process */ + row = numRowsA; + i = 0U; + px = pDst.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of transposed pSrcB data */ + pInB = pSrcBT; + + /* column loop */ + do + { + /* Set variable sum, that acts as accumulator, to zero */ + sum = ACC{}; + + /* Initiate pointer pInA to point to starting address of column being processed */ + pInA = pSrcA.ptr() + i; + + /* Apply loop unrolling and compute 2 MACs simultaneously. */ + colCnt = numColsA / (2*nb_lanes); + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* read real and imag values from pSrcA and pSrcB buffer */ + inA1 = inner::vload1<1> (pInA); + pInA += nb_lanes; + inB1 = inner::vload1<1> (pInB); + pInB += nb_lanes; + + inA2 = inner::vload1<1> (pInA); + pInA += nb_lanes; + inB2 = inner::vload1<1> (pInB); + pInB += nb_lanes; + + /* Multiply and Accumulates */ + sum = inner::vmacc(sum, inA1, inB1); + sum = inner::vmacc(sum, inA2, inB2); + + /* Decrement loop counter */ + colCnt--; + } + + /* process remaining column samples */ + colCnt = numColsA & (2*nb_lanes-1); + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + sum = inner::mac(sum ,*pInA++ , *pInB++); + + /* Decrement loop counter */ + colCnt--; + } + + /* Saturate and store result in destination buffer */ + *px = inner::from_accumulator(sum); + px++; + + /* Decrement column loop counter */ + col--; + + } while (col > 0U); + + i = i + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + +} +#endif +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/DSP/memory.hpp b/dsppp/Include/dsppp/DSP/memory.hpp new file mode 100644 index 00000000..6aa19057 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/memory.hpp @@ -0,0 +1,98 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + + +namespace arm_cmsis_dsp { + + +/** \addtogroup DSPALG + * @{ + */ + +#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) + + +__STATIC_FORCEINLINE int32_t read_q15x2 ( + Q15 const * pQ15) +{ + int32_t val; + const int16_t *p=reinterpret_cast(pQ15); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, p, 4); +#else + val = (p[1] << 16) | (p[0] & 0x0FFFF) ; +#endif + + return (val); +}; + + + +__STATIC_FORCEINLINE void write_q15x2 ( + Q15 * pQ15, + int32_t value) +{ + int32_t val = value; + int16_t *p=reinterpret_cast(pQ15); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (p, &val, 4); +#else + p[0] = (int16_t)(val & 0x0FFFF); + p[1] = (int16_t)(val >> 16); +#endif +}; + + +__STATIC_FORCEINLINE int32_t read_q7x4 ( + Q7 const * pQ7) +{ + int32_t val; + const int8_t *p=reinterpret_cast(pQ7); + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, p, 4); +#else + val =((p[3] & 0x0FF) << 24) | ((p[2] & 0x0FF) << 16) | ((p[1] & 0x0FF) << 8) | (p[0] & 0x0FF); +#endif + return (val); +}; + + + + + + +__STATIC_FORCEINLINE void write_q7x4 ( + Q7 *& pQ7, + int32_t value) +{ + int8_t *p=reinterpret_cast(pQ7); + int32_t val = value; +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (p, &val, 4); +#else + p[0] = (q7_t)(val & 0x0FF); + p[1] = (q7_t)((val >> 8) & 0x0FF); + p[2] = (q7_t)((val >> 16) & 0x0FF); + p[3] = (q7_t)((val >> 24) & 0x0FF); + +#endif +}; + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/DSP/num_features.hpp b/dsppp/Include/dsppp/DSP/num_features.hpp new file mode 100644 index 00000000..e13f1a92 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/num_features.hpp @@ -0,0 +1,14 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/* + +vreduce is going from vector accumulator to scalar accumulator +from_accumulator is going from scalar accumulator to scalar datatype + + +*/ + +#include "q7.hpp" +#include "q15.hpp" diff --git a/dsppp/Include/dsppp/DSP/q15.hpp b/dsppp/Include/dsppp/DSP/q15.hpp new file mode 100644 index 00000000..5320f922 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/q15.hpp @@ -0,0 +1,301 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup DSPNumber DSP extension specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup DSPQ15Number Q15 + * \ingroup DSPNumber + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + +/** + * @brief Representation of a vector when DSP extension supported + */ +struct Q15DSPVector { + /** + * @brief Create new 0 initialized vector + */ + Q15DSPVector():v(0){}; + + /** + * @brief Create vector initialized from value + * + * @param[in] val The value + */ + explicit Q15DSPVector(int32_t val):v(val){}; + + /** + * @brief Return value in vector + */ + operator int32_t(){return v;}; + +int32_t v; +}; + +/** + * @brief Vector description for Q15 with DSP extensions + */ +template<> +struct vector_traits::type> +{ + //! Scalar datatype + typedef Q15 type; + + //! Storage datatype + typedef type::value_type storage_type; + + //! Vector datatype + typedef Q15DSPVector vector; + + //! Accumulator datatype + typedef Q<33,30> temp_accumulator; + + /* + + The evaluators are not using any predication and instead + use additional code after the loop to manage the tail. + + So, no inner function with predicate_t is required. + + Fusion operators still have call to inner operator with + predicate but they are not called in this context. + + */ + + /** + * Dummy type since there is no predicated loop for + * DSP extensions + */ + typedef uint32_t predicate_t; + + //! Has some vector instructions + static constexpr bool has_vector = true; + + //! Is not float + static constexpr bool is_float = false; + + //! Is fixed point + static constexpr bool is_fixed = true; + + //! No predicated loops + static constexpr bool has_predicate = false; + + //! Number of lanes + static constexpr int nb_lanes = 2; + + /** + * @brief Zero accumulator + * + * @return Zero accumulator + */ + static Q<33,30> temp_acc_zero() + { + return(Q<33,30>()); + } + + /** + * @brief Value to write in a lane to write 0 + * + * @return Zero value for a lane + */ + static constexpr int16_t zero_lane() {return 0;}; + + /** + * @brief Convert to lane value + * + * @param[in] x Value + * + * @return Lane value + */ + static constexpr int16_t lane_value(const Q15 x) {return x.v;}; + + +}; + + +/** + * \ingroup DSPNumber + */ +namespace inner { + + /* Needed to build but not used */ + template<> + struct vctpq{ + static uint32_t mk(uint32_t v) + { + return(v); + }; + }; + + /** + * @brief Vector const + * + * @param[in] val The value + * + * @return The static forceinline. + */ + __STATIC_FORCEINLINE Q15DSPVector vconst(Q15 val) + { + return(Q15DSPVector(__PKHBT(val.v, val.v, 16))); + } + + + __STATIC_FORCEINLINE Q15DSPVector vneg(const Q15DSPVector a) + { + return(Q15DSPVector(__QSUB16(0, a.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15DSPVector a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QADD16(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15DSPVector a, + const Q15 b) + { + return(Q15DSPVector(__QADD16(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15 a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QADD16(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15DSPVector a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QSUB16(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15DSPVector a, + const Q15 b) + { + return(Q15DSPVector(__QSUB16(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vsub(const Q15 a, + const Q15DSPVector b) + { + return(Q15DSPVector(__QSUB16(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15DSPVector a, + const Q15DSPVector b) + { + q31_t mul1,mul2; + q15_t out1,out2; + + mul1 = (q31_t) ((q15_t) (a.v ) * (q15_t) (b.v )); + mul2 = (q31_t) ((q15_t) (a.v >> 16) * (q15_t) (b.v >> 16)); + + out1 = (q15_t) __SSAT(mul1 >> 15, 16); + out2 = (q15_t) __SSAT(mul2 >> 15, 16); + return(Q15DSPVector(__PKHBT(out1, out2, 16))); + }; + + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15DSPVector a, + const Q15 b) + { + return(vmul(a,vconst(b))); + }; + + __STATIC_FORCEINLINE Q15DSPVector vmul(const Q15 a, + const Q15DSPVector b) + { + return(vmul(vconst(a),b)); + }; + + + template::type = true> + inline Q15DSPVector vload1(const Q15 *p) + { + return(Q15DSPVector(read_q15x2(p))); + }; + + + template1),bool>::type = true> + inline Q15DSPVector vload1(const Q15 *p) + { + Q15 a = p[0]; + Q15 b = p[S]; + + return(Q15DSPVector(__PKHBT(a.v, b.v, 16))); + }; + + + // Dynamic stride + inline Q15DSPVector vload1(const Q15 *p,index_t stride) + { + Q15 a = p[0]; + Q15 b = *(p+stride); + + return(Q15DSPVector(__PKHBT(a.v, b.v, 16))); + } + + template::type = true> + inline void vstore1(Q15 *p,const Q15DSPVector val) + { + write_q15x2 (p, val.v); + }; + + template1),bool>::type = true> + inline void vstore1(Q15 *p,const Q15DSPVector val) + { + p[0] = Q15(val.v & 0x0FFFF); + p[S] = Q15(val.v >> 16); + }; + + // dynamic stride + inline void vstore1(Q15 *p,const index_t stride, + const Q15DSPVector val) + { + p[0] = Q15(val.v & 0x0FFFF); + *(p+stride) = Q15(val.v >> 16); + } + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q<33,30> sum, + const Q15DSPVector vala, + const Q15DSPVector valb) + { + return(Q<33,30>(__SMLALD(vala.v,valb.v,sum.v))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q15DSPVector vala, + const Q15DSPVector valb) + { + return(Q<33,30>(__SMLALD(vala.v,valb.v,0))); + }; + + __STATIC_FORCEINLINE Q<33,30> vreduce(const Q<33,30> sum) + { + return(sum); + }; + + +}; + + +#endif +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/DSP/q7.hpp b/dsppp/Include/dsppp/DSP/q7.hpp new file mode 100644 index 00000000..7c218294 --- /dev/null +++ b/dsppp/Include/dsppp/DSP/q7.hpp @@ -0,0 +1,264 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_DSP +#undef ARM_MATH_MVEI +#undef ARM_MATH_MVEF +#undef ARM_MATH_NEON +#endif + +/** \addtogroup DSPNumber DSP extension specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup DSPQ7Number Q7 + * \ingroup DSPNumber + * @{ + */ + +#if defined(ARM_MATH_DSP) +#if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON) + + +struct Q7DSPVector { + Q7DSPVector():v(0){}; + explicit Q7DSPVector(int32_t val):v(val){}; + operator int32_t(){return v;}; +int32_t v; +}; + +template<> +struct vector_traits::type> +{ + typedef Q7 type; + typedef type::value_type storage_type; + typedef Q7DSPVector vector; + typedef Q<17,14> temp_accumulator; + + /* + + The evaluators are not using any predication and instead + use additional code after the loop to manage the tail. + + So, no inner function with predicate_t is required. + + Fusion operators still have call to inner operator with + predicate but they are not called in this context. + + */ + typedef uint32_t predicate_t; + + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = false; + + static constexpr int nb_lanes = 4; + + static Q<17,14> temp_acc_zero() + { + return(Q<17,14>()); + } + + static constexpr int8_t zero_lane() {return 0;}; + + static constexpr int8_t lane_value(const Q7 x) {return x.v;}; + + +}; + + + +namespace inner { + + /* Needed to build but not used */ + template<> + struct vctpq{ + static uint32_t mk(uint32_t v) + { + return(v); + }; + }; + + __STATIC_FORCEINLINE Q7DSPVector vconst(Q7 val) + { + return(Q7DSPVector(__PACKq7(val.v, val.v, val.v, val.v))); + } + + + __STATIC_FORCEINLINE Q7DSPVector vneg(const Q7DSPVector a) + { + return(Q7DSPVector(__QSUB8(0, a.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7DSPVector a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QADD8(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7DSPVector a, + const Q7 b) + { + return(Q7DSPVector(__QADD8(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7 a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QADD8(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7DSPVector a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QSUB8(a.v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7DSPVector a, + const Q7 b) + { + return(Q7DSPVector(__QSUB8(a.v,vconst(b).v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vsub(const Q7 a, + const Q7DSPVector b) + { + return(Q7DSPVector(__QSUB8(vconst(a).v,b.v))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7DSPVector a, + const Q7DSPVector b) + { + q7_t out1, out2, out3, out4; + q15_t mul1,mul2,mul3,mul4; + + mul1 = (q15_t) ((q7_t) (a.v ) * (q7_t) (b.v )); + mul2 = (q15_t) ((q7_t) (a.v >> 8) * (q7_t) (b.v >> 8)); + mul3 = (q15_t) ((q7_t) (a.v >> 16) * (q7_t) (b.v >> 16)); + mul4 = (q15_t) ((q7_t) (a.v >> 24) * (q7_t) (b.v >> 24)); + + out1 = (q7_t) __SSAT(mul1 >> 7, 8); + out2 = (q7_t) __SSAT(mul2 >> 7, 8); + out3 = (q7_t) __SSAT(mul3 >> 7, 8); + out4 = (q7_t) __SSAT(mul4 >> 7, 8); + return(Q7DSPVector(__PACKq7(out1,out2,out3,out4))); + }; + + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7DSPVector a, + const Q7 b) + { + return(vmul(a,vconst(b))); + }; + + __STATIC_FORCEINLINE Q7DSPVector vmul(const Q7 a, + const Q7DSPVector b) + { + return(vmul(vconst(a),b)); + }; + + + template::type = true> + inline Q7DSPVector vload1(const Q7 *p) + { + return(Q7DSPVector(read_q7x4(p))); + }; + + + template1),bool>::type = true> + inline Q7DSPVector vload1(const Q7 *p) + { + Q7 a = p[0]; + Q7 b = p[S]; + Q7 c = p[2*S]; + Q7 d = p[3*S]; + + return(Q7DSPVector(__PACKq7(a.v, b.v, c.v,d.v))); + }; + + + // Dynamic stride + inline Q7DSPVector vload1(const Q7 *p,index_t stride) + { + Q7 a = p[0]; + Q7 b = *(p+stride); + Q7 c = *(p+2*stride); + Q7 d = *(p+3*stride); + + return(Q7DSPVector(__PACKq7(a.v, b.v, c.v,d.v))); + } + + template::type = true> + inline void vstore1(Q7 *p,const Q7DSPVector val) + { + write_q7x4 (p, val.v); + }; + + template1),bool>::type = true> + inline void vstore1(Q7 *p,const Q7DSPVector val) + { + p[0] = Q7(val.v & 0x0FF); + p[S] = Q7(val.v >> 8); + p[2*S] = Q7(val.v >> 16); + p[3*S] = Q7(val.v >> 24); + }; + + // dynamic stride + inline void vstore1(Q7 *p,const index_t stride, + const Q7DSPVector val) + { + p[0] = Q7(val.v & 0x0FF); + *(p+stride) = Q7(val.v >> 8); + *(p+2*stride) = Q7(val.v >> 16); + *(p+3*stride) = Q7(val.v >> 24); + } + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q<17,14> sum, + const Q7DSPVector vala, + const Q7DSPVector valb) + { + q31_t inA1, inA2, inB1, inB2; + q31_t s; + inA1 = __SXTB16(__ROR(vala.v, 8)); + /* extract reminaing two samples */ + inA2 = __SXTB16(vala.v); + /* extract two q7_t samples to q15_t samples */ + inB1 = __SXTB16(__ROR(valb.v, 8)); + /* extract reminaing two samples */ + inB2 = __SXTB16(valb.v); + + /* multiply and accumulate two samples at a time */ + s = __SMLAD(inA1, inB1, sum.v); + s = __SMLAD(inA2, inB2, s); + + return(Q<17,14>(s)); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q7DSPVector vala, + const Q7DSPVector valb) + { + return(vmacc(Q<17,14>(0),vala,valb)); + }; + + __STATIC_FORCEINLINE Q<17,14> vreduce(const Q<17,14> sum) + { + return(sum); + }; + + +}; + + +#endif +#endif + + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/basic.hpp b/dsppp/Include/dsppp/Helium/basic.hpp new file mode 100644 index 00000000..fd804230 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/basic.hpp @@ -0,0 +1,314 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#endif + +/** \addtogroup ARCHALG + * \addtogroup HELIUMALG Helium specific algorithm + * \ingroup ARCHALG + * @{ + */ + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +/** + * @brief Fill evaluator for Helium + * + * @param v Destination value + * @param[in] val Initialization value + * @param[in] l Vector length + * + * @tparam T Scalar datatype + * @tparam DST Destination datatype + * @tparam Check if has vector indexing + */ +template() && + IsVector::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + const vector_length_t l, + const Helium* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + index_t i=0; + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + v.vector_store_tail(i,l-i,inner::vconst_tail(val,inner::vctpq::mk(l-i))); + } +} + +/** + * @brief Fill2D evaluator for Helium + * + * @param v Destination value + * @param[in] val Initialization value + * @param[in] rows Number of rows + * @param[in] cols Number of columns + * + * @tparam T Scalar datatype + * @tparam DST Destination datatype + * @tparam Check only matrix indexing supported + */ +template() && + must_use_matrix_idx() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const Helium* = nullptr) +{ + constexpr int nb_lanes = vector_traits::nb_lanes; + + // Outer unroll factor in case inner loop does not have + // enough arithmetic instructions. + // In future version this may be estimated from the + // complexity of the AST to evaluate + constexpr int U = 1; + index_t row=0; + + UNROLL_LOOP + for(; row <= rows-U;row += U) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + for(int k=0;k::mk(cols-col))); + } + } + } + + for(; row < rows;row ++) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + v.matrix_store_tail(row,col,cols-col,inner::vconst_tail(val,inner::vctpq::mk(cols-col))); + } + } +} + +/** + * @brief Eval function for Helium + * + * @param v Destination + * @param[in] other Expression to evaluate + * @param[in] l Vector length + * + * @tparam DA Destination datatype + * @tparam DB Expression datatype + * @tparam Check vector indexing and compatible vectors + */ +template() && + vector_idx_pair(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const Helium* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + + index_t i=0; + + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + v.vector_store_tail(i,l-i,other.vector_op_tail(i,l-i)); + } +} + +/** + * @brief Eval2D function for Helium + * + * @param v Destination vector + * @param[in] other Expression to evaluate + * @param[in] rows Number of rows + * @param[in] cols Number of columns + * + * @tparam DA Destination datatype + * @tparam DB Source datatype + * @tparam Check has only matrix indexing + */ +template() && + must_use_matrix_idx_pair(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const Helium* = nullptr) +{ + using T = typename traits::Scalar; + constexpr int nb_lanes = vector_traits::nb_lanes; + // Attempt at computing the unrolling factor + // depending on the complexity of the AST + // (will have to rework this estimation) + constexpr int RU = 5 - Complexity::value; + constexpr int U = (RU <= 0) || (RU>=5) ? 1 : RU; + index_t row=0; + + UNROLL_LOOP + for(; row <= rows-U;row += U) + { + + UNROLL_LOOP + for(index_t col=0; col < cols;col += nb_lanes) + { + for(int k=0;k +void printt(const TupType& _tup, std::index_sequence) +{ + std::cout << "("; + (..., (std::cout << (I == 0? "" : ", ") << std::get(_tup))); + std::cout << ")\n"; +} + +/** + * @brief Print tuple + * + * @param[in] _tup Tuple + * + * @tparam T Datatype for tuple elements + */ +template +void printt (const std::tuple& _tup) +{ + printt(_tup, std::make_index_sequence()); +} + +/** + * @brief Dor product for Helium + * + * @param[in] a First expression + * @param[in] b Second expression + * @param[in] l Vector length + * + * @tparam DA First operand datatype + * @tparam DB Second operand datatype + * @tparam Check vector indexing and compatible vectors + * + * @return Dot product of vector expressions + */ +template() && + vector_idx_pair(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const Helium* = nullptr) +{ + //using Res = DotResult; + // Vector scalar datatype + + using T = typename traits::Scalar; + using Temp = typename vector_traits::temp_accumulator; + + constexpr int nb_lanes = vector_traits::nb_lanes; + + Temp acc = vector_traits::temp_acc_zero(); + + UNROLL_LOOP + for(index_t i=0; i::mk(l-i)); + } + + return(inner::vreduce(acc)); +} + +/** + * @brief Swap operator for Helium + * + * @param a First opetand + * @param b Second operand + * @param[in] l Vector length + * + * @tparam DA First operand datatype + * @tparam DB Second operand datatype + * @tparam Check vector indexing and compatible vectors + */ +template() && + vector_idx_pair(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const Helium* = nullptr) +{ + using Scalar = typename ElementType::type; + using Vector = typename vector_traits::vector; + + constexpr int nb_lanes = vector_traits::type>::nb_lanes; + index_t i=0; + Vector tmpa,tmpb; + + UNROLL_LOOP + for(i=0;i < l; i += nb_lanes) + { + tmpa = a.vector_op_tail(i,l-i); + tmpb = b.vector_op_tail(i,l-i); + b.vector_store_tail(i,l-i,tmpa); + a.vector_store_tail(i,l-i,tmpb); + } +} +#endif + +/*! @} */ + diff --git a/dsppp/Include/dsppp/Helium/float.hpp b/dsppp/Include/dsppp/Helium/float.hpp new file mode 100644 index 00000000..817beb13 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/float.hpp @@ -0,0 +1,870 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumFloatNumber Float + * \ingroup HeliumNumber + * @{ + */ + +/****************** + * + * Helium + * + */ + +#if defined(ARM_MATH_MVEF) + +/* + + +Arch is deriving from Helium + +*/ +/** + * @brief Vector datatype for Helium + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value>::type > +{ + //! Scalar datatype + typedef float type; + //! Storage datatype + typedef float storage_type; + //! Vector datatype + typedef float32x4_t vector; + //! Temp accumulator datatype (must be reduced to final scalar datatype) + typedef float32x4_t temp_accumulator; + //! Predicate datatype + typedef mve_pred16_t predicate_t; + //! Has vector instruction + static constexpr bool has_vector = true; + //! Is float + static constexpr bool is_float = true; + //! Is not fixed point + static constexpr bool is_fixed = false; + //! Has predicated loops + static constexpr bool has_predicate = true; + + //! Number of lanes + static constexpr int nb_lanes = 4; + + /** + * @brief Temp accumulator initialized to 0 + * + * @return Temp accumulator initialized to 0 + */ + static float32x4_t temp_acc_zero() + { + return(vdupq_n_f32(0.0f)); + } + + /** + * @brief Zero lane + * + * @return Value to wrte 0 into a lane + */ + static constexpr float zero_lane() {return 0.0f;}; + + // Useful in fixed point since lane value is an int and not a Q something + + /** + * @brief Lane value + * + * @param[in] x Lane vlue + * + * @return Value with scalar datatype + */ + static constexpr float lane_value(const float x) {return x;}; + +}; + + +/** + * \ingroup HeliumNumber + * @{ + */ +namespace inner { + + + /** + * @brief vctpq for this datatype + */ + template<> + struct vctpq { + /** + * @brief Make a predicate + * + * @param[in] v Number of iterations + * + * @return Predicate + */ + static mve_pred16_t mk(uint32_t v) + { + return(vctp32q(v)); + }; + }; + + /** + * @brief Vector constant + * + * @param[in] v Constant value + * + * @return Vector initialized with constant in each lane + */ + __STATIC_FORCEINLINE float32x4_t vconst(const float v) + { + return(vdupq_n_f32(v)); + } + + /** + * @brief Vector constant with tail + * + * @param[in] v Constant value + * @param[in] p0 Prddicate + * + * @return Vector initialized with constant in some lanes + * dependign on the predicate + */ + __STATIC_FORCEINLINE float32x4_t vconst_tail(const float v, + const mve_pred16_t p0) + { + return(vdupq_x_n_f32(v,p0)); + } + + /** + * @brief Vector negate + * + * @param[in] a Vector value to negate + * + * @return Negated value + */ + __STATIC_FORCEINLINE float32x4_t vneg(const float32x4_t a) + { + return(vnegq(a)); + }; + + /** + * @brief Vector negate with tail + * + * @param[in] a Value + * @param[in] p0 Predicate + * + * @return Negated value + */ + __STATIC_FORCEINLINE float32x4_t vneg(const float32x4_t a, + const mve_pred16_t p0) + { + return(vnegq_x(a,p0)); + }; + + /** + * @brief Vector + Vector + * + * @param[in] a First operand + * @param[in] b Second operand + * + * @return a + b + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b) + { + return(vaddq(a,b)); + }; + + /** + * @brief Vector + Scalar + * + * @param[in] a Vector + * @param[in] b Scalar + * + * @return a + b + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float b) + { + return(vaddq_n_f32(a,b)); + }; + + /** + * @brief Scalar + Vector + * + * @param[in] a Scalar + * @param[in] b Vector + * + * @return a + b + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float a,const float32x4_t b) + { + return(vaddq_n_f32(b,a)); + }; + + /** + * @brief Vector + Vector with tail + * + * @param[in] a Vector + * @param[in] b Vector + * @param[in] p0 Predicated + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vaddq_x(a,b,p0)); + }; + + + /** + * @brief Vector + scalar with tail + * + * @param[in] a Vector + * @param[in] b Scalar + * @param[in] p0 Predicate + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f32(a,b,p0)); + }; + + /** + * @brief Scalar + vector with tail predicate + * + * @param[in] a Scalar + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float32x4_t vadd(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f32(b,a,p0)); + }; + + /** + * @brief Vector - Vector + * + * @param[in] a Vector + * @param[in] b Vector + * + * @return a - b + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float32x4_t b) + { + return(vsubq(a,b)); + }; + + /** + * @brief Vector - Scalar + * + * @param[in] a Vector + * @param[in] b Scalar + * + * @return a - b + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float b) + { + return(vsubq_n_f32(a,b)); + }; + + /** + * @brief Scalar - Vector + * + * @param[in] a Scalar + * @param[in] b Vector + * + * @return a - b + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float a,const float32x4_t b) + { + return(vsubq_n_f32(b,a)); + }; + + /** + * @brief Vector - Vector with predicate + * + * @param[in] a Vector + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return a - b + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vsubq_x(a,b,p0)); + }; + + /** + * @brief Vector - Scalar with predicate + * + * @param[in] a Vector + * @param[in] b Scalar + * @param[in] p0 predicate + * + * @return a - b with predicate + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f32(a,b,p0)); + }; + + /** + * @brief Scalar - Vector with predicate + * + * @param[in] a Scalar + * @param[in] b Vector + * @param[in] p0 predicate + * + * @return a - b with predicate + */ + __STATIC_FORCEINLINE float32x4_t vsub(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f32(b,a,p0)); + }; + + /** + * @brief Vector * Vector + * + * @param[in] a Vector + * @param[in] b Vector + * + * @return a * b + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b) + { + return(vmulq(a,b)); + }; + + /** + * @brief Vector * Scalar + * + * @param[in] a Vector + * @param[in] b Scalar + * + * @return a * b + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float b) + { + return(vmulq_n_f32(a,b)); + }; + + /** + * @brief Scalar * Vector + * + * @param[in] a Scalar + * @param[in] b Vector + * + * @return a * b + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float a,const float32x4_t b) + { + return(vmulq_n_f32(b,a)); + }; + + /** + * @brief Vector * Vector with predicate + * + * @param[in] a Vector + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return a * b + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vmulq_x(a,b,p0)); + }; + + /** + * @brief Vector * Scalar with predicate + * + * @param[in] a Vector + * @param[in] b Scalar + * @param[in] p0 Predicate + * + * @return a * b with predicate + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f32(a,b,p0)); + }; + + /** + * @brief Scalar * Vector with predicate + * + * @param[in] a Scalar + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return a * b with predicate + */ + __STATIC_FORCEINLINE float32x4_t vmul(const float a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f32(b,a,p0)); + }; + + /** + * @brief Multiply accumulate (Vector * Vector) + * + * @param[in] acc Accumulator + * @param[in] a Vector + * @param[in] b Vector + * + * @return acc + a * b + */ + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float32x4_t b) + { + return(vfmaq(acc,a,b)); + }; + +/** + * @brief Multiply accumulate (Vector * Scalar) + * + * @param[in] acc Accumulator + * @param[in] a Vector + * @param[in] b Scalar + * + * @return acc + a * b + */ + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float_t b) + { + return(vfmaq(acc,a,b)); + }; + + /** + * @brief Multiply accumulate with predicate (Vector * Vector) + * + * @param[in] acc Accumulator + * @param[in] a Vector + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return acc + a*b with predicate + */ + __STATIC_FORCEINLINE float32x4_t vmacc(const float32x4_t acc,const float32x4_t a,const float32x4_t b, + const mve_pred16_t p0) + { + return(vfmaq_m(acc,a,b,p0)); + }; + + + + /** + * @brief Vector reduce + * + * @param[in] in Vector + * + * @return Reduced scalar value + */ + __STATIC_FORCEINLINE float vreduce(const float32x4_t in) + { + float acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) + + vgetq_lane(in, 2) + vgetq_lane(in, 3); + return(acc); + }; + + + + /** + * @brief Vector load with stride + * + * @param[in] p Load address + * + * @tparam S Stride + * @tparam Check stride value + * + * @return Loaded vector with stride + */ + template::type = true> + inline float32x4_t vload1(const float32_t *p) + { + return(vld1q(p)); + }; + + + template1),bool>::type = true> + inline float32x4_t vload1(const float32_t *p) + { + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + + + // With dynamic stride + + /** + * @brief Vector load with dynamic stride + * + * @param[in] p Load address + * @param[in] stride Stride value + * + * @return Loaded vector with stride + */ + inline float32x4_t vload1(const float32_t *p,const index_t stride) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + + + /** + * @brief Vector load with stride and predicate + * + * @param[in] p Load address + * @param[in] nb Number of remaining loop samples + * @param[in] p0 Predicate for remaining loop samples + * + * @tparam S Stride + * @tparam Check stride value + * + * @return Loaded vector with stride and loop predication + */ + template::type = true> + inline float32x4_t vload1_z(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + + + template1),bool>::type = true> + inline float32x4_t vload1_z(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + + // With dynamic stride + + /** + * @brief Vector load with dynamic stride and loop predication + * + * @param[in] p Load address + * @param[in] stride Stride value + * @param[in] nb Number of remaining loop samples + * @param[in] p0 Predicate for remaining loop samples + * + * @return Loaded vector with stride and loop predicate + */ + inline float32x4_t vload1_z(const float32_t *p,const index_t stride,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + + /* Generalized stride */ + + /** + * @brief Load with generalized stride (gather load) + * + * @tparam S List of offsets known at built time + */ + template + struct vload1_gen_stride + { + /** + * @brief Load with generalized stride + * + * @param[in] p Load address + * + * @return Gather load + */ + static float32x4_t run(const float32_t *p) + { + constexpr uint32x4_t offset={S...}; + return(vldrwq_gather_shifted_offset_f32(p,offset)); + }; + }; + + /** + * @brief Load with generalized stride specialized for <0,1,2,3> + */ + template<> + struct vload1_gen_stride<0,1,2,3> + { + /** + * @brief Load with generalized stride + * + * @param[in] p Load address + * + * @return Loaded vector + */ + static float32x4_t run(const float32_t *p) + { + return(vld1q(p)); + }; + }; + + /* Generalized stride */ + + /** + * @brief Load with generalized stride (gather load) and tail predicate + * + * @tparam S List of offsets known at built time + */ + template + struct vload1_gen_stride_z + { + /** + * @brief Load + * + * @param[in] p Load address + * @param[in] nb Number of remaining samples in loop + * @param[in] p0 Predicate for remaining samples + * + * @return Gather load with predicate + */ + static float32x4_t run(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + constexpr uint32x4_t offset={S...}; + (void)nb; + return(vldrwq_gather_shifted_offset_z_f32(p,offset,p0)); + }; + }; + + /** + * @brief Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3> + * + * @tparam S List of offsets known at built time + */ + template<> + struct vload1_gen_stride_z<0,1,2,3> + { + /** + * @brief Gather load with predicated specialized for <0,1,2,3> + * + * @param[in] p Load address + * @param[in] nb Number of remaining samples in the loop + * @param[in] p0 Predicate for samples in the loop + * + * @return Gather load + */ + static float32x4_t run(const float32_t *p,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + }; + + /** + * @brief Store with stride + * + * @param p Store address + * @param[in] val Value to store + * + * @tparam S Stride + * @tparam Check stride value + */ + template::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + vst1q(p,val); + }; + + template1),bool>::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + + // with dynamic stride + + /** + * @brief Store with dynamic stride + * + * @param p Store address + * @param[in] stride Stride value + * @param[in] val Value to store + */ + inline void vstore1(float32_t *p,const index_t stride,const float32x4_t val) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + + + /** + * @brief Store with stride and tail predicate + * + * @param p Store address + * @param[in] val Value to store + * @param[in] nb Number of remaining loop iterations + * @param[in] p0 Predicate for loop + * + * @tparam S Stride + * @tparam Check stride value + */ + template::type = true> + inline void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + vstrwq_p(p,val,p0); + }; + + template1),bool>::type = true> + inline void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + //uint32x4_t offset={0,1,2,3}; + //uint32x4_t offset = vidupq_u32((uint32_t)0,1); + //offset = vmulq_n_u32(offset,S); + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + }; + + // with dynamic stride + + /** + * @brief Store with dynamic stride + * + * @param p Store address + * @param[in] stride Stride value + * @param[in] val Value to store + * @param[in] nb Number of remaining loops + * @param[in] p0 Predicate for loop + */ + inline void vstore1_z(float32_t *p,const index_t stride,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + }; + + // Generalized stride + + /** + * @brief Generalized store with strides + * + * @tparam S Stride values known at built time + */ + template + struct vstore1_gen_stride + { + /** + * @brief Scatter store + * + * @param p Store address + * @param[in] val VAlue to store + */ + static void run(float32_t *p,const float32x4_t val) + { + constexpr uint32x4_t offset={S...}; + vstrwq_scatter_shifted_offset_f32(p,offset,val); + }; + }; + + /** + * @brief Generalized store with stride (Specialized for <0,1,2,3>) + */ + template<> + struct vstore1_gen_stride<0,1,2,3> + { + /** + * @brief Scatter store + * + * @param p Store address + * @param[in] val Value to store + */ + static void run(float32_t *p,const float32x4_t val) + { + vst1q(p,val); + }; + }; + + /** + * @brief Store with generalized strides and tail predicate + * + * @tparam S Strides values known at built time + */ + template + struct vstore1_gen_stride_z + { + /** + * @brief Scatter store with tail predicate + * + * @param p Store address + * @param[in] val Value to store + * @param[in] nb Remaining number of loops + * @param[in] p0 Loop predicate + */ + static void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + constexpr uint32x4_t offset={S...}; + (void)nb; + vstrwq_scatter_shifted_offset_p_f32(p,offset,val,p0); + } + }; + + /** + * @brief Scatter store with tail predicate (specialized for <0,1,2,3>) + */ + template<> + struct vstore1_gen_stride_z<0,1,2,3> + { + /** + * @brief Scatter store with tail predicate + * + * @param p Store address + * @param[in] val Value to store + * @param[in] nb Number of remaining loops + * @param[in] p0 Loop predicate + */ + static void vstore1_z(float32_t *p,const float32x4_t val,const std::size_t nb,const mve_pred16_t p0) + { + (void)nb; + vstrwq_p(p,val,p0); + } + + }; + + + +}; +/*! @} */ + +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/half.hpp b/dsppp/Include/dsppp/Helium/half.hpp new file mode 100644 index 00000000..bb678c1a --- /dev/null +++ b/dsppp/Include/dsppp/Helium/half.hpp @@ -0,0 +1,652 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumHalfNumber Half + * \ingroup HeliumNumber + * @{ + */ + +#if defined(ARM_MATH_MVE_FLOAT16) + +/** + * @brief Vector features for f16 on Helium + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value>::type> +{ + //! Scalar datatype + typedef float16_t type; + //! Storage datatype + typedef float16_t storage_type; + //! Vector datatype + typedef float16x8_t vector; + //! Temp accumulator datatype + typedef float16x8_t temp_accumulator; + //! Predicate datatype + typedef mve_pred16_t predicate_t; + + //! Has vector instructions + static constexpr bool has_vector = true; + //! Is float + static constexpr bool is_float = true; + //! Is not fixed point + static constexpr bool is_fixed = false; + //! Has predicated loop + static constexpr bool has_predicate = true; + + //! Number of lanes + static constexpr int nb_lanes = 8; + + /** + * @brief Vector of 0 + * + * @return Vector of 0 + */ + static float16x8_t temp_acc_zero() + { + return(vdupq_n_f16(0.0f)); + } + + /** + * @brief Value to write 0 in a lane + * + * @return Value to write 0 in a lane + * + * f16 suffix not supported in C++ + */ + static constexpr float16_t zero_lane() {return 0.0f;}; + // Useful in fixed point since lane value is an int and not a Q something + + /** + * @brief Convert from lane value + * + * @param[in] x Lane value + * + * @return Lane value with current datatype + */ + static constexpr float16_t lane_value(const float16_t x) {return x;}; + +}; + + +/** + * \ingroup HeliumNumber + */ +namespace inner { + + + /** + * @brief vctpq for Helium and f16 + */ + template<> + struct vctpq{ + /** + * @brief Make predicate + * + * @param[in] v Remaining iterations + * + * @return Predicate + */ + static mve_pred16_t mk(uint32_t v) + + { + return(vctp16q(v)); + }; + }; + + /** + * @brief Vector const + * + * @param[in] v Initialization value + * + * @return Vector of const + */ + __STATIC_FORCEINLINE float16x8_t vconst(float16_t v) + { + return(vdupq_n_f16(v)); + } + + /** + * @brief Vector of const with tail predicate + * + * @param[in] v The initialization parameter + * @param[in] p0 The predicate + * + * @return The initialized vector with const and predicate + */ + __STATIC_FORCEINLINE float16x8_t vconst_tail(const float16_t v, + const mve_pred16_t p0) + { + return(vdupq_x_n_f16(v,p0)); + } + + /** + * @brief Vector negate + * + * @param[in] a Vector + * + * @return Negate of vector + */ + __STATIC_FORCEINLINE float16x8_t vneg(const float16x8_t a) + { + return(vnegq(a)); + }; + + /** + * @brief Vector negate with tail predicate + * + * @param[in] a Vector + * @param[in] p0 Predicate + * + * @return Negate of vector with tail predicate + */ + __STATIC_FORCEINLINE float16x8_t vneg(const float16x8_t a, + const mve_pred16_t p0) + { + return(vnegq_x(a,p0)); + }; + + /* + + ADD + + */ + + /** + * @brief Vector + Vector + * + * @param[in] a Vector + * @param[in] b Vector + * + * @return a + b + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16x8_t b) + { + return(vaddq(a,b)); + }; + + /** + * @brief Vector + Scalar + * + * @param[in] a Vector + * @param[in] b Scalar + * + * @return a + b + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16_t b) + { + return(vaddq_n_f16(a,b)); + }; + + /** + * @brief Scalar + Vector + * + * @param[in] a Scalar + * @param[in] b Vector + * + * @return a + b + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16_t a, + const float16x8_t b) + { + return(vaddq_n_f16(b,a)); + }; + + /** + * @brief Vector + Vector with tail predicate + * + * @param[in] a Vector + * @param[in] b Vector + * @param[in] p0 predicate + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vaddq_x(a,b,p0)); + }; + + /** + * @brief Vector + Scalar with tail predicate + * + * @param[in] a Vector + * @param[in] b Scalar + * @param[in] p0 Predicate + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f16(a,b,p0)); + }; + + /** + * @brief Scalar + Vector with tail predicate + * + * @param[in] a Scalar + * @param[in] b Vector + * @param[in] p0 Predicate + * + * @return a + b with tail predicate + */ + __STATIC_FORCEINLINE float16x8_t vadd(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vaddq_x_n_f16(b,a,p0)); + }; + + /* + + SUB + + */ + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16x8_t b) + { + return(vsubq(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16_t b) + { + return(vsubq_n_f16(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16_t a, + const float16x8_t b) + { + return(vsubq_n_f16(b,a)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vsubq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f16(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vsub(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vsubq_x_n_f16(b,a,p0)); + }; + + /* + + MUL + + */ + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16x8_t b) + { + return(vmulq(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16_t b) + { + return(vmulq_n_f16(a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16_t a, + const float16x8_t b) + { + return(vmulq_n_f16(b,a)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vmulq_x(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16x8_t a, + const float16_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f16(a,b,p0)); + }; + + __STATIC_FORCEINLINE float16x8_t vmul(const float16_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vmulq_x_n_f16(b,a,p0)); + }; + + /* + + vmacc + + */ + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16x8_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16_t b) + { + return(vfmaq(acc,a,b)); + }; + + __STATIC_FORCEINLINE float16x8_t vmacc(const float16x8_t acc, + const float16x8_t a, + const float16x8_t b, + const mve_pred16_t p0) + { + return(vfmaq_m(acc,a,b,p0)); + }; + + + + __STATIC_FORCEINLINE float16_t vreduce(float16x8_t in) + { + float16x8_t tmpVec; + _Float16 acc; + + tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in); + in = vaddq_f16(tmpVec, in); + tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in); + in = vaddq_f16(tmpVec, in); + acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4); + + return acc; + }; + + /* + + Load + + */ + + template::type = true> + inline float16x8_t vload1(const float16_t *p) + { + return(vld1q(p)); + }; + + template1) && (S<=65535),bool>::type = true> + inline float16x8_t vload1(const float16_t *p) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vldrhq_gather_shifted_offset_f16(p,offset)); + }; + + template65535),bool>::type = true> + inline float16x8_t vload1(const float16_t *p) + { + float16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = *p; + p += S; + } + + return(res); + }; + + // With dynamic stride + inline float16x8_t vload1(const float16_t *p,const index_t stride) + { + if (stride <= 65535) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_f16(p,offset)); + } + else + { + float16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = *p; + p += stride; + } + return(res); + } + }; + + + + template::type = true> + inline float16x8_t vload1_z(const float16_t *p, + const std::size_t nb, + const mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(p,p0)); + }; + + template1)&& (S<=65535),bool>::type = true> + inline float16x8_t vload1_z(const float16_t *p, + const std::size_t nb, + const mve_pred16_t p0) + { + (void)nb; + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vldrhq_gather_shifted_offset_z_f16(p,offset,p0)); + }; + + template65535),bool>::type = true> + inline float16x8_t vload1_z(const float16_t *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + float16x8_t res; + std::size_t i=0; + for(;i::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + vst1q(p,val); + }; + + template1) && (S<=65535),bool>::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + vstrhq_scatter_shifted_offset_f16(p,offset,val); + }; + + template65535),bool>::type = true> + inline void vstore1(float16_t *p,const float16x8_t val) + { + for(std::size_t i=0;i<8;i++) + { + *p = val[i]; + p += S; + } + + }; + + // dynamic stride + inline void vstore1(float16_t *p, + const index_t stride, + const float16x8_t val) + { + if (stride <=65535) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + vstrhq_scatter_shifted_offset_f16(p,offset,val); + } + else + { + for(std::size_t i=0;i<8;i++) + { + *p = val[i]; + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)nb; + vstrhq_p(p,val,p0); + }; + + template1) && (S<=65535),bool>::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)nb; + + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + vstrhq_scatter_shifted_offset_p_f16(p,offset,val,p0); + }; + + + template65535),bool>::type = true> + inline void vstore1_z(float16_t *p, + const float16x8_t val, + std::size_t nb, + mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const Helium* = nullptr) +{ + + const vector_length_t nb_rows=m.rows(); + constexpr int U = 4; + + index_t row=0; + + DISABLE_LOOP_UNROLL + for(; row<=nb_rows-U; row += U) + { + results([&res,&row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll( + [&row,&m](index_t k){return m.row(row+k);}), + replicate(v) + )); + } + + switch (nb_rows-row) + { + case 3: + results<3>([&res,row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll<3>( + [row,&m](index_t k){return m.row(row+k);}), + replicate<3>(v) + )); + break; + case 2: + results<2>([&res,row](index_t k){return &res[row+k];}) = + inner::from_accumulator(dot(unroll<2>( + [row,&m](index_t k){return m.row(row+k);}), + replicate<2>(v) + )); + break; + case 1: + res[row] = inner::from_accumulator(dot(m.row(row),v)); + break; + } + +} + +#define MATRIX_DIM2 2 +#define MATRIX_DIM3 3 +#define MATRIX_DIM4 4 + +#if defined(ARM_MATH_MVEI) + +/* Fixed point specific cases*/ +#include "matrix_multiply_fixed.hpp" + +#endif + +#if defined(ARM_MATH_MVEF) + +/* Datatype specific cases*/ +#include "matrix_multiply_f16.hpp" +#include "matrix_multiply_f32.hpp" + +/* Generic float */ +template() && + number_traits::Scalar>::is_float,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const Helium* = nullptr) + { + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + int numRowsA = pSrcA.rows(); /* number of rows of input matrix A */ + int numColsB = pSrcB.columns(); /* number of columns of input matrix B */ + int numColsA = pSrcA.columns(); /* number of columns of input matrix A */ + uint32_t blkCnt; /* loop counters */ + uint32_t i; + + { + /* small squared matrix specialized routines */ + if(numRowsA == numColsB && numColsB == numColsA) { + if (numRowsA == 1) + { + pDst(0,0)= pSrcA(0,0) * pSrcB(0,0); + return; + } + else if(numRowsA == 2) + return _arm_mat_mult_2x2_mve(pSrcA, pSrcB, std::forward(pDst)); + else if(numRowsA == 3) + return _arm_mat_mult_3x3_mve(pSrcA, pSrcB, std::forward(pDst)); + else if(numRowsA == 4) + return _arm_mat_mult_4x4_mve(pSrcA, pSrcB, std::forward(pDst)); + } + + /* main loop process 4 rows */ + i = numRowsA >> 2; + while (i > 0U) + { + T *pInA0, *pInA1, *pInA2, *pInA3; + T *pInB0; + T *pOut0, *pOut1, *pOut2, *pOut3; + ACC vecMac0, vecMac1, vecMac2, vecMac3; + VEC vecInB; + + /* pointers to 4 consecutive output rows */ + pOut0 = pOut; + pOut1 = pOut0 + pDst.stride(); + pOut2 = pOut1 + pDst.stride(); + pOut3 = pOut2 + pDst.stride(); + pInB0 = pInB; + + uint32_t k = numColsB / nb_lanes; + while (k > 0U) + { + /* pointers to 4 consecutive Matrix A rows */ + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + + vecMac0 = vector_traits::temp_acc_zero(); + vecMac1 = vector_traits::temp_acc_zero(); + vecMac2 = vector_traits::temp_acc_zero(); + vecMac3 = vector_traits::temp_acc_zero(); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1<1>(pInB0); /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x 4 block) in the destination buffer */ + inner::vstore1<1>(pOut0, vecMac0); + pOut0 += nb_lanes; + inner::vstore1<1>(pOut1, vecMac1); + pOut1 += nb_lanes; + inner::vstore1<1>(pOut2, vecMac2); + pOut2 += nb_lanes; + inner::vstore1<1>(pOut3, vecMac3); + pOut3 += nb_lanes; + + /* + * rewind + */ + pInB0 -= (pSrcB.stride() * numColsA) - nb_lanes; + k--; + } + + int colBLeft = numColsB & (nb_lanes - 1); + if (colBLeft) + { + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + + mve_pred16_t p0 = inner::vctpq::mk(colBLeft); + + vecMac0 = vector_traits::temp_acc_zero(); + vecMac1 = vector_traits::temp_acc_zero(); + vecMac2 = vector_traits::temp_acc_zero(); + vecMac3 = vector_traits::temp_acc_zero(); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1_z<1>(pInB0, colBLeft,p0); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x colBLeft block) in the destination buffer */ + inner::vstore1_z<1>(pOut0, vecMac0, colBLeft,p0); + inner::vstore1_z<1>(pOut1, vecMac1, colBLeft,p0); + inner::vstore1_z<1>(pOut2, vecMac2, colBLeft,p0); + inner::vstore1_z<1>(pOut3, vecMac3, colBLeft,p0); + } + + /* move to next rows */ + pInA += 4 * pSrcA.stride(); + pOut += 4 * pDst.stride(); + i--; + } + + /* + * non multiple of 4 rows for Matrix A + * process single row + */ + if (numRowsA & 3) + { + i = numRowsA & 3; + while (i > 0U) + { + T *pInA0; + T *pInB0; + T *pOut0; + VEC vecInB; + ACC vecMac0; + + pOut0 = pOut; + pInB0 = pInB; + + uint32_t k = numColsB / nb_lanes; + while (k > 0U) + { + pInA0 = pInA; + + vecMac0 = vector_traits::temp_acc_zero(); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1<1>(pInB0); /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (1 x 4 block) in the destination buffer */ + inner::vstore1<1>(pOut0, vecMac0); + pOut0 += nb_lanes; + + /* + * rewind + */ + pInB0 -= (pSrcB.stride() * numColsA) - nb_lanes; + k--; + } + + int colBLeft = numColsB & (nb_lanes-1); + if (colBLeft) + { + pInA0 = pInA; + mve_pred16_t p0 = inner::vctpq::mk(colBLeft); + + vecMac0 = vector_traits::temp_acc_zero(); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = inner::vload1_z<1>(pInB0, colBLeft,p0); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + pSrcB.stride(); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* Store the results (1 x colBLeft block) in the destination buffer */ + inner::vstore1_z<1>(pOut0, vecMac0, colBLeft,p0); + } + + /* move to next row */ + pInA += 1 * pSrcA.stride(); + pOut += 1 * pDst.stride(); + i--; + } + + } + +} + +} + + +#undef MATRIX_DIM2 +#undef MATRIX_DIM3 +#undef MATRIX_DIM4 + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp new file mode 100644 index 00000000..3671160f --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_f16.hpp @@ -0,0 +1,404 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +#if defined(ARM_MATH_MVE_FLOAT16) + +/* + +This can't be used with stride bigger than 21845 +which for embedded is acceptable. + +No check is done at runtime or build time that the stride is not +too big. + +*/ + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_2x2_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + //using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const uint16_t offsetA[8] = { 0, 0, (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), + 0, 0, (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride() }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + + /* {d00, d01, d10, d11} */ + const uint16_t offsetD[8] = { 0, 1, (uint16_t)pDst.stride(), (uint16_t)(pDst.stride()+1), + 0,0,0,0 }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + VEC vecInA, vecInB, vecDst; + T *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + /* + * load {a00 a00 a10 a10 x x x x } + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b00 b01 x x x x } + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 a00 b01 + * a10 b00 a10 b01 + * x x + * x x } + */ + vecDst = vmulq(vecInA, vecInB); + /* + * move to 2nd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a01 a01 a11 a11 x x x x} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b10, b11, x x x x } + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 + * a10 b00 + a11 b10 a10 b01 + a11 b11 + * x x + * x x } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + + mve_pred16_t p0 = vctp16q(2*2); + /* + * Store the result in the destination buffer + * (lower half of the vector) + */ + + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + + vstrhq_scatter_shifted_offset_p(pOut,vecOffsD,vecDst,p0); + +} + + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_3x3_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + const uint16_t offsetA[8] = { 0, 0, 0, + (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), + (uint16_t)(2U*pSrcA.stride()), (uint16_t)(2U*pSrcA.stride()) }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 2, 0, 1, 2, 0, 1 }; + const uint16_t offsetD[8] = { 0, 1, 2, + (uint16_t)(0+pDst.stride()), (uint16_t)(1+pDst.stride()), + (uint16_t)(2+pDst.stride()), + (uint16_t)(0+2*pDst.stride()), + (uint16_t)(1+2*pDst.stride()) }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + float16x8_t vecInA, vecInB, vecDst; + float16_t *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + + /* + * load {a00 a00 a00 a10 a10 a10 a20 a20} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b02 b00 b01 b02 b00 b01} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 a00 b01 a00 b02 + * a10 b00 a10 b01 a10 b02 + * a20 b00 a20 b01} + */ + vecDst = vmulq(vecInA, vecInB); + + /* + * move to 2nd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a01 a01 a01 a11 a11 a11 a21 a21} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b12, b10, b11, b12, b10, b11} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 a00 b02 + a01 b12 + * a10 b00 + a11 b10 a10 b01 + a11 b11 a10 b02 + a11 b12 + * a20 b00 + a21 b10 a20 b01 + a21 b11 } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + /* + * move to 3rd column of matrix A + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); + /* + * load {a02 a02 a02 a12 a12 a12 a22 a22} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b20, b21, b22, b20, b21, b22, b20, b21} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * {a00 b00 + a01 b10 + a02 b20 a00 b01 + a01 b11 + a02 b21 a00 b02 + a01 b12 + a02 b22}, + * a10 b00 + a11 b10 + a12 b20 a10 b01 + a11 b11 + a12 b21 a10 b02 + a11 b12 + a12 b22}, + * a20 b00 + a21 b10 + a22 b20 a20 b01 + a21 b11 + a22 b21 } + */ + vecDst = vfmaq(vecDst, vecInA, vecInB); + + /* + * Store the result in the destination buffer + */ + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst); + + pOut += 2*pDst.stride()+2; + + /* last element computed in scalar mode + * a20 b02 + a21 b12 + a22 b22 + */ + + const _Float16 * pA = (const _Float16 *)pSrcA.const_ptr(); + const _Float16 * pB = (const _Float16 *)pSrcB.const_ptr(); + const index_t sa =pSrcA.stride(); + const index_t sb =pSrcB.stride(); + *pOut = pA[2*sa] * pB[2] + pA[1+2*sa] * pB[2+sb] + pA[2+2*sa] * pB[2+2*sb]; + +} + + + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_4x4_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + /* offsetA allows to read and duplicate 2 successive column elements of A */ + const uint16_t offsetA[8] = { 0, 0, 0, 0, + (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride(), (uint16_t)pSrcA.stride() }; + /* offsetB allows to read and duplicate 1 row of B */ + const uint16_t offsetB[8] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + + const uint16_t offsetD[8] = { 0, 1, 2, 3, + (uint16_t)(0+pDst.stride()), (uint16_t)(1+pDst.stride()), + (uint16_t)(2+pDst.stride()), (uint16_t)(3+pDst.stride()) }; + + uint16x8_t vecOffsA, vecOffsB,vecOffsD; + float16x8_t vecInA, vecInB, vecDst0, vecDst1; + float16_t *pOut = pDst.ptr(); /* output data matrix pointer */ + + /* + * load initial offsets + */ + vecOffsA = vldrhq_u16((uint16_t const *) offsetA); + vecOffsB = vldrhq_u16((uint16_t const *) offsetB); + + /* + * load {a00 a00 a00 a00 a10 a10 a10 a10} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * load {b00 b01 b02 b03 b00 b01 b02 b03} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + + /* + * { a00 b00 a00 b01 a00 b02 a00 b03 + * a10 b00 a10 b01 a10 b02 a10 b03 } + */ + vecDst0 = vmulq(vecInA, vecInB); + /* + * jump 2 x A rows (2nd half of matrix) + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a20 a20 a20 a20 a30 a30 a30 a30} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * { a20 b00 a20 b01 a20 b02 a20 b03 + * a30 b00 a30 b01 a30 b02 + a31 b12 } + */ + vecDst1 = vmulq(vecInA, vecInB); + /* + * rewind back to top half of the A matrix (2nd column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a01 a01 a01 a01 a11 a11 a11 a11} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b10, b11, b12, b13, b10, b11, b12, b13} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 a00 b01 + a01 b11 a00 b02 + a01 b12 a00 b03 + a01 b13 + * a10 b00 + a11 b10 a10 b01 + a11 b11 a10 b02 + a11 b12 a10 b03 + a11 b13 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows (2nd half of matrix) + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a21 a21 a21 a21 a31 a31 a31 a31} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 + a21 b10 a20 b01 + a21 b11 a20 b02 + a21 b12 a20 b03 + a21 b13 + * a30 b00 + a31 b10 a30 b01 + a31 b11 a30 b02 + a31 b12 a30 b03 + a31 b13 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * rewind back to top half of the A matrix (3rd column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a02 a02 a02 a02 a12 a12 a12 a12} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b20, b21, b22, b23, b20, b21, b22, b23} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 + a01 b10 + a02 b20 a00 b01 + a01 b11 + a02 b21 a00 b02 + a01 b12 + a02 b22 a00 b03 + a01 b13 + a02 b23 + * a10 b00 + a11 b10 + a12 b20 a10 b01 + a11 b11 + a12 b21 a10 b02 + a11 b12 + a12 b22 a10 b03 + a11 b13 + a12 b23 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 2*pSrcA.stride()); + + /* + * load {a22 a22 a22 a22 a32 a32 a32 a32} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 + a21 b10 + a22 b20 a20 b01 + a21 b11 + a22 b21 a20 b02 + a21 b12 + a22 b22 a20 b03 + a21 b13 + a22 b23 + * a30 b00 + a31 b10 + a32 b20 a30 b01 + a31 b11 + a32 b21 a30 b02 + a31 b12 + a32 b22 a30 b03 + a31 b13 + a32 b23 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * rewind back to top half of the A matrix (4th column) + */ + vecOffsA = vsubq(vecOffsA, (uint16_t) (2*pSrcA.stride()-1)); + /* + * load {a03 a03 a03 a03 a13 a13 a13 a13} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * move to next B row + */ + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) pSrcB.stride()); + /* + * load {b30, b31, b32, b33, b30, b31, b32, b33} + */ + vecInB = vldrhq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + /* + * { a00 b00 +...+ a03 b30, a00 b01 +...+ a03 b31, a00 b02 +...+ a03 b32, a00 b03 +...+ a03 b33 + * a10 b00 +...+ a13 b30, a10 b01 +...+ a13 b31, a10 b02 +...+ a13 b32, a10 b03 +...+ a13 b33 } + */ + vecDst0 = vfmaq(vecDst0, vecInA, vecInB); + /* + * jump 2 x A rows + */ + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) pSrcA.stride()*2); + /* + * load {a23 a23 a23 a23 a33 a33 a33 a33} + */ + vecInA = vldrhq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + /* + * {a20 b00 +...+ a23 b30, a20 b01 +...+ a23 b31, a20 b02 +...+ a23 b32, a20 b03 +...+ a23 b33 + * a30 b00 +...+ a33 b30, a30 b01 +...+ a33 b31, a30 b02 +...+ a33 b32, a30 b03 +...+ a33 b33 } + */ + vecDst1 = vfmaq(vecDst1, vecInA, vecInB); + + /* + * Store the result in the destination buffer + */ + vecOffsD = vldrhq_u16((uint16_t const *) offsetD); + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst0); + pOut += 2*pDst.stride(); + vstrhq_scatter_shifted_offset(pOut,vecOffsD,vecDst1); + +} + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp new file mode 100644 index 00000000..ecdfbc6c --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_f32.hpp @@ -0,0 +1,270 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_2x2_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + //using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + /* {a00, a00, a10, a10} */ + const uint32_t offsetA0[4] = { 0, 0, pSrcA.stride(), pSrcA.stride() }; + /* {b00, b01, b00, b01} */ + const uint32_t offsetB0[4] = { 0, 1, 0, 1 }; + /* {a01, a01, a11, a11} */ + const uint32_t offsetA1[4] = { 1, 1, pSrcA.stride() + 1, pSrcA.stride() + 1 }; + /* {b10, b11, b10, b11} */ + const uint32_t offsetB1[4] = { pSrcB.stride(), pSrcB.stride()+1, pSrcB.stride(), pSrcB.stride()+1 }; + + /* {d00, d01, d10, d11} */ + const uint32_t offsetD[4] = { 0, 1, pDst.stride(), pDst.stride()+1 }; + + uint32x4_t vecOffsA, vecOffsB,vecOffsC; + VEC vecInA, vecInB, vecDst; + + if constexpr (!HasStaticStride::value) + { + vecOffsA = vldrwq_u32((uint32_t const *) offsetA0); + } + vecOffsB = vldrwq_u32((uint32_t const *) offsetB0); + + if constexpr (!HasStaticStride::value) + { + vecInA = vldrwq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + } + else + { + constexpr int s = StaticStride::value; + vecInA = inner::vload1_gen_stride<0, 0, s, s>::run(pSrcA.const_ptr()); + } + + if constexpr (!HasStaticStride::value) + { + vecInB = vldrwq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + } + else + { + vecInB = inner::vload1_gen_stride<0, 1, 0, 1>::run(pSrcB.const_ptr()); + } + vecDst = inner::vmul(vecInA, vecInB); + + if constexpr (!HasStaticStride::value) + { + vecOffsA = vldrwq_u32((uint32_t const *) offsetA1); + } + + if constexpr (!HasStaticStride::value) + { + vecOffsB = vldrwq_u32((uint32_t const *) offsetB1); + } + + if constexpr (!HasStaticStride::value) + { + vecInA = vldrwq_gather_shifted_offset(pSrcA.const_ptr(), vecOffsA); + } + else + { + constexpr int s = StaticStride::value; + vecInA = inner::vload1_gen_stride<1, 1, s+1, s+1>::run(pSrcA.const_ptr()); + + } + + if constexpr (!HasStaticStride::value) + { + vecInB = vldrwq_gather_shifted_offset(pSrcB.const_ptr(), vecOffsB); + } + else + { + constexpr int s = StaticStride::value; + vecInB = inner::vload1_gen_stride::run(pSrcB.const_ptr()); + } + + if constexpr (!HasStaticStride::value) + { + vecOffsC = vldrwq_u32((uint32_t const *) offsetD); + } + + vecDst = inner::vmacc(vecDst, vecInA, vecInB); + + //inner::vstore1<1>(pDst.ptr(), vecDst); + if constexpr (!HasStaticStride::value) + { + vstrwq_scatter_shifted_offset(pDst.ptr(),vecOffsC,vecDst); + } + else + { + constexpr int s = StaticStride::value; + inner::vstore1_gen_stride<0, 1, s, s+1>::run(pDst.ptr(),vecDst); + } + +} + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_3x3_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + T *pInA0, *pInA1, *pInA2; + ACC vecMac0, vecMac1, vecMac2; + VEC vecInB; + T const *pSrBVec; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + /* enable predication to disable last (4th) vector element */ + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM3); + + /* + * load {b0,0, b0,1, b0,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmul(vecInB, *pInA0++); + vecMac1 = inner::vmul(vecInB, *pInA1++); + vecMac2 = inner::vmul(vecInB, *pInA2++); + /* + * load {b1,0, b1,1, b1,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + /* + * load {b2,0, b2,1 , b2,2, 0} + */ + vecInB = inner::vload1_z<1>(pSrBVec, MATRIX_DIM3,p0); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + + /* partial vector stores */ + inner::vstore1_z<1>(pOut, vecMac0, MATRIX_DIM3,p0); + pOut += pDst.stride(); + inner::vstore1_z<1>(pOut, vecMac1, MATRIX_DIM3,p0); + pOut += pDst.stride(); + inner::vstore1_z<1>(pOut, vecMac2, MATRIX_DIM3,p0); + /* + * Return to application + */ +} + +template() && + SameElementType::value,bool>::type = true> +__STATIC_INLINE void _arm_mat_mult_4x4_mve( + const MA &pSrcA, + const MB &pSrcB, + RES &&pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + T const *pSrBVec; + T *pInB = pSrcB.ptr(); /* input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + T *pInA0, *pInA1, *pInA2, *pInA3; + ACC vecMac0, vecMac1, vecMac2, vecMac3; + VEC vecInB; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + pSrcA.stride(); + pInA2 = pInA1 + pSrcA.stride(); + pInA3 = pInA2 + pSrcA.stride(); + /* + * load {b0,0, b0,1, b0,2, b0,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmul(vecInB, *pInA0++); + vecMac1 = inner::vmul(vecInB, *pInA1++); + vecMac2 = inner::vmul(vecInB, *pInA2++); + vecMac3 = inner::vmul(vecInB, *pInA3++); + /* + * load {b1,0, b1,1, b1,2, b1,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + /* + * load {b2,0, b2,1, b2,2, b2,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + /* + * load {b3,0, b3,1, b3,2, b3,3} + */ + vecInB = inner::vload1<1>(pSrBVec); + pSrBVec += pSrcB.stride(); + + vecMac0 = inner::vmacc(vecMac0, vecInB, *pInA0++); + vecMac1 = inner::vmacc(vecMac1, vecInB, *pInA1++); + vecMac2 = inner::vmacc(vecMac2, vecInB, *pInA2++); + vecMac3 = inner::vmacc(vecMac3, vecInB, *pInA3++); + + inner::vstore1<1>(pOut, vecMac0); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac1); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac2); + pOut += pDst.stride(); + inner::vstore1<1>(pOut, vecMac3); + +} + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp b/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp new file mode 100644 index 00000000..8169fc0f --- /dev/null +++ b/dsppp/Include/dsppp/Helium/matrix_multiply_fixed.hpp @@ -0,0 +1,613 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HELIUMALG + * @{ + */ + +#if defined(ARM_MATH_MVEI) + + + +#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) + + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_2x2_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + ACC acc0, acc1; + VEC vecB, vecA0, vecA1; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM2); + + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM2,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); + } + + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM2,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM2,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + + pOut[0] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM2,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM2,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + + pOut[0] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + +} + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_3x3_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + const T *pInA2 = pInA1 + pSrcA.stride(); + ACC acc0, acc1, acc2; + VEC vecB, vecA0, vecA1, vecA2; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM3); + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM3,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM3,p0); + vecA2 = inner::vload1_z<1>(pInA2,MATRIX_DIM3,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM3,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM3,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + +} + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline void arm_mat_mult_4x4_mve( + const MA & pSrcA, + const MB & pSrcB, + RES && pDst) +{ + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + + const T *pInB = pSrcB.const_ptr(); /* input data matrix pointer B */ + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + T *pOut = pDst.ptr(); /* output data matrix pointer */ + const T *pInA0 = pInA; + const T *pInA1 = pInA0 + pSrcA.stride(); + const T *pInA2 = pInA1 + pSrcA.stride(); + const T *pInA3 = pInA2 + pSrcA.stride(); + ACC acc0, acc1, acc2, acc3; + VEC vecB, vecA0, vecA1, vecA2, vecA3; + mve_pred16_t p0 = inner::vctpq::mk(MATRIX_DIM4); + + + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + vecA0 = inner::vload1_z<1>(pInA0,MATRIX_DIM4,p0); + vecA1 = inner::vload1_z<1>(pInA1,MATRIX_DIM4,p0); + vecA2 = inner::vload1_z<1>(pInA2,MATRIX_DIM4,p0); + vecA3 = inner::vload1_z<1>(pInA3,MATRIX_DIM4,p0); + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + + pOut++; + + /* move to next B column */ + pInB = pInB + 1; + + if constexpr (HasStaticStride::value) + { + vecB = inner::vload1_z::value>(pInB,MATRIX_DIM4,p0); + } + else + { + vecB = inner::vload1_z(pInB,pSrcB.stride(),MATRIX_DIM4,p0); + } + + acc0 = inner::vmacc(vecA0, vecB,p0); + acc1 = inner::vmacc(vecA1, vecB,p0); + acc2 = inner::vmacc(vecA2, vecB,p0); + acc3 = inner::vmacc(vecA3, vecB,p0); + + + pOut[0 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc0)); + pOut[1 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc1)); + pOut[2 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc2)); + pOut[3 * pDst.stride()] = inner::from_accumulator(inner::vreduce(acc3)); + +} + + + +template() && + number_traits::Scalar>::is_fixed,bool>::type = true> + __STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const Helium* = nullptr) + { + using T = typename traits::Scalar; + using ACC = typename vector_traits::temp_accumulator; + using VEC = typename vector_traits::vector; + constexpr int nb_lanes = vector_traits::nb_lanes; + + const T *pInA = pSrcA.const_ptr(); /* input data matrix pointer A */ + const T *pInB = pSrcB.const_ptr(); + T *pOut = pDst.ptr(); /* input data matrix pointer B */ + T *px; /* Temporary output data matrix pointer */ + T *px2; /* Temporary output data matrix pointer */ + uint32_t numRowsA = pSrcA.rows(); /* number of rows of input matrix A */ + + uint32_t numColsB = pSrcB.columns(); /* number of columns of input matrix B */ + uint32_t numColsA = pSrcA.columns(); /* number of columns of input matrix A */ + + uint32_t strideA = pSrcA.stride(); /* number of columns of input matrix A */ + + uint32_t numRowsB = pSrcB.rows(); /* number of rows of input matrix A */ + uint32_t col, i = 0u, j, row = numRowsB; /* loop counters */ + + const T *pInA2; + const T *pInB2; + uint32_t blkCnt; /* loop counters */ + + + + { + /* small squared matrix specialized routines */ + if (numRowsA == numColsB && numColsB == numColsA) { + + if (numRowsA == 1) { + pDst(0,0) = pSrcA(0,0) * pSrcB(0,0); + return; + } else if (numRowsA == 2) + return arm_mat_mult_2x2_mve(pSrcA, pSrcB, std::forward(pDst)); + else if (numRowsA == 3) + return arm_mat_mult_3x3_mve(pSrcA, pSrcB, std::forward(pDst)); + else if (numRowsA == 4) + return arm_mat_mult_4x4_mve(pSrcA, pSrcB, std::forward(pDst)); + } + + /* + * Matrix transpose + */ + + const T *pSrcBT = BT.const_ptr(); + + + /* + * Reset the variables for the usage in the following multiplication process + */ + i = 0; + row = numRowsA >> 1; + px = pOut; + px2 = px + pDst.stride(); + + /* + * The following loop performs the dot-product of each row in pSrcA with each column in pSrcB + */ + + /* + * row loop + */ + while (row > 0u) { + /* + * For every row wise process, the column loop counter is to be initiated + */ + col = numColsB >> 1; + /* + * For every row wise process, the pIn2 pointer is set + * to the starting address of the transposed pSrcB data + */ + pInB = pSrcBT; + pInB2 = pInB + numRowsB; + j = 0; + + /* + * column loop + */ + while (col > 0u) { + T const *pSrcAVec, *pSrcBVec, *pSrcA2Vec, *pSrcB2Vec; + VEC vecA, vecA2, vecB, vecB2; + ACC acc0, acc1, acc2, acc3; + + /* + * Initiate the pointer pIn1 to point to the starting address of the column being processed + */ + pInA = pSrcA.const_ptr() + i; + pInA2 = pInA + strideA; + pInB = pSrcBT + j; + pInB2 = pInB + numRowsB; + + + pSrcAVec = (T const *) pInA; + pSrcA2Vec = (T const *) pInA2; + pSrcBVec = (T const *) pInB; + pSrcB2Vec = (T const *) pInB2; + + acc0 = vector_traits::temp_acc_zero(); + acc1 = vector_traits::temp_acc_zero(); + acc2 = vector_traits::temp_acc_zero(); + acc3 = vector_traits::temp_acc_zero(); + + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + + blkCnt = numColsA / nb_lanes; + while (blkCnt > 0U) { + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + vecA2 = inner::vload1<1>(pSrcA2Vec); + pSrcA2Vec += nb_lanes; + acc1 = inner::vmacc(acc1, vecA2, vecB); + vecB2 = inner::vload1<1>(pSrcB2Vec); + pSrcB2Vec += nb_lanes; + acc2 = inner::vmacc(acc2, vecA, vecB2); + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + acc3 = inner::vmacc(acc3, vecA2, vecB2); + + blkCnt--; + } + /* + * tail + */ + blkCnt = numColsA & (nb_lanes-1); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + vecA2 = inner::vload1<1>(pSrcA2Vec); + acc1 = inner::vmacc(acc1, vecA2, vecB, p0); + vecB2 = inner::vload1<1>(pSrcB2Vec); + acc2 = inner::vmacc(acc2, vecA, vecB2, p0); + vecA = inner::vload1<1>(pSrcAVec); + acc3 = inner::vmacc(acc3, vecA2, vecB2, p0); + } + + *px++ = inner::from_accumulator(inner::vreduce(acc0)); + *px++ = inner::from_accumulator(inner::vreduce(acc2)); + *px2++ = inner::from_accumulator(inner::vreduce(acc1)); + *px2++ = inner::from_accumulator(inner::vreduce(acc3)); + + j += numRowsB * 2; + /* + * Decrement the column loop counter + */ + col--; + + } + + i = i + strideA * 2; + px = px2 + (numColsB & 1u); + px2 = px + pDst.stride(); + /* + * Decrement the row loop counter + */ + row--; + } + + /* + * Compute remaining row and/or column below + */ + + if (numColsB & 1u) { + row = numRowsA & (~0x1); //avoid redundant computation + px = pOut + + pDst.stride() - 1; + i = 0; + + /* + * row loop + */ + while (row > 0) { + + + T const *pSrcAVec, *pSrcBVec; + VEC vecA, vecB; + ACC acc0; + + /* + * point to last column in matrix B + */ + pInB = pSrcBT + numRowsB * (numColsB - 1); + pInA = pSrcA.const_ptr() + i; + + pSrcAVec = (T const *) pInA; + pSrcBVec = (T const *) pInB; + + acc0 = vector_traits::temp_acc_zero(); + blkCnt = (numColsA) / nb_lanes; + while (blkCnt > 0U) { + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + + blkCnt--; + } + /* + * tail + */ + blkCnt = (numColsA & (nb_lanes-1)); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecA = inner::vload1<1>(pSrcAVec); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + } + + *px = inner::from_accumulator(inner::vreduce(acc0)); + + px += pDst.stride(); + + i += strideA; + /* + * Decrement the row loop counter + */ + row--; + } + } + + if (numRowsA & 1u) { + col = numColsB; + i = 0u; + /* + * point to last row in output matrix + */ + px = pOut + pDst.stride() * (numRowsA - 1); + /* + * col loop + */ + while (col > 0) { + + T const *pSrcAVec, *pSrcBVec; + VEC vecA, vecB; + ACC acc0; + + /* + * point to last row in matrix A + */ + pInA = pSrcA.const_ptr() + (numRowsA - 1) * strideA; + pInB = pSrcBT + i; + + /* + * Set the variable sum, that acts as accumulator, to zero + */ + pSrcAVec = (T const *) pInA; + pSrcBVec = (T const *) pInB; + acc0 = vector_traits::temp_acc_zero(); + + blkCnt = ((numColsA) / nb_lanes); + while (blkCnt > 0U) { + vecA = inner::vload1<1>(pSrcAVec); + pSrcAVec += nb_lanes; + vecB = inner::vload1<1>(pSrcBVec); + pSrcBVec += nb_lanes; + acc0 = inner::vmacc(acc0, vecA, vecB); + + blkCnt--; + } + /* + * tail + */ + blkCnt = (numColsA & 7); + if (blkCnt > 0U) { + mve_pred16_t p0 = inner::vctpq::mk(blkCnt); + vecA = inner::vload1<1>(pSrcAVec); + vecB = inner::vload1<1>(pSrcBVec); + acc0 = inner::vmacc(acc0, vecA, vecB, p0); + } + + *px++ = inner::from_accumulator(inner::vreduce(acc0)); + + i += numColsA; + + /* + * Decrement the col loop counter + */ + col--; + } + } + + } + +} + +#endif + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Helium/num_features.hpp b/dsppp/Include/dsppp/Helium/num_features.hpp new file mode 100644 index 00000000..1f3b34d5 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/num_features.hpp @@ -0,0 +1,17 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/* + +vreduce is going from vector accumulator to scalar accumulator +from_accumulator is going from scalar accumulator to scalar datatype + + +*/ + +#include "float.hpp" +#include "half.hpp" +#include "q31.hpp" +#include "q15.hpp" +#include "q7.hpp" diff --git a/dsppp/Include/dsppp/Helium/q15.hpp b/dsppp/Include/dsppp/Helium/q15.hpp new file mode 100644 index 00000000..472f85df --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q15.hpp @@ -0,0 +1,542 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ15Number Q15 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + +/** + * @brief Vector features for Q15 on Helium + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value>::type > +{ + //! Scalar datatype + typedef Q15 type; + + //! Storage datatype (int16_t) + typedef type::value_type storage_type; + + //! Vector datatype + typedef int16x8_t vector; + + //! Temp accumulator datatype + typedef Q<33,30> temp_accumulator; + + //! Predicate for loop + typedef mve_pred16_t predicate_t; + + //! Has vector instructions + static constexpr bool has_vector = true; + + //! Is not float + static constexpr bool is_float = false; + + //! Is fixed point + static constexpr bool is_fixed = true; + + //! Has predicated loop + static constexpr bool has_predicate = true; + + //! Number of lanes + static constexpr int nb_lanes = 8; + + + /** + * @brief Zero + * + * @return Zero with accumulator datatype + */ + static Q<33,30> temp_acc_zero() + { + return(Q<33,30>()); + } + + /** + * @brief Value to write in a lane to write 0 + * + * @return Zero value + */ + static constexpr int16_t zero_lane() {return 0;}; + + /** + * @brief Convert to lane value + * + * @param[in] x Lane value + * + * @return Lane value + */ + static constexpr int16_t lane_value(const Q15 x) {return x.v;}; + +}; + +/** + * \ingroup HeliumNumber + */ +namespace inner { + + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp16q(v)); + }; + }; + + __STATIC_FORCEINLINE int16x8_t vconst(Q15 val) + { + return(vdupq_n_s16(val.v)); + } + + __STATIC_FORCEINLINE int16x8_t vconst_tail(Q15 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s16(val.v,p0)); + } + + + __STATIC_FORCEINLINE int16x8_t vneg(const int16x8_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int16x8_t vneg(const int16x8_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s16(),a,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const int16x8_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const Q15 b) + { + return(vqaddq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const Q15 a,const int16x8_t b) + { + return(vqaddq_n_s16(b,a.v)); + }; + + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s16(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vadd(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const int16x8_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const Q15 b) + { + return(vqsubq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const Q15 a,const int16x8_t b) + { + return(vqsubq_n_s16(b,a.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s16(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vsub(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const int16x8_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const Q15 b) + { + return(vqdmulhq_n_s16(a,b.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const Q15 a,const int16x8_t b) + { + return(vqdmulhq_n_s16(b,a.v)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s16(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int16x8_t vmul(const int16x8_t a,const Q15 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s16(vuninitializedq_s16(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int16x8_t vmul(const Q15 a,const int16x8_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s16(vuninitializedq_s16(),b,a.v,p0)); + }; + + template::type = true> + inline int16x8_t vload1(const Q15 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + /* + + 7*S must be <= 65535 so + S <= 9362 + + */ + + /** + * @brief Vector load with stride + * + * @param[in] p Load address + * + * @tparam S Stride + * @tparam Stride check + * + * @return Gather load + * + * In q15, a lane is on 16 bits. So the offset that can be encoded + * for gather load cannot be bigger than 65535. + * With a stride of S, the bigger offset is S*7. + * So S must be <= 65535/7 + * S <= 9362 + * + * For higher stride, the Helium instruction cannot be used and instead + * a dynamic stride is used. + */ + template1) && (S<=9362),bool>::type = true> + inline int16x8_t vload1(const Q15 *p) + { + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + return(vldrhq_gather_shifted_offset_s16(reinterpret_cast(p),offset)); + }; + + template9362),bool>::type = true> + inline int16x8_t vload1(const Q15 *p) + { + int16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = p->v; + p += S; + } + + return(res); + }; + + // Dynamic stride + inline int16x8_t vload1(const Q15 *p,index_t stride) + { + if (stride <= 9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_s16(reinterpret_cast(p),offset)); + } + else + { + int16x8_t res; + for(std::size_t i=0;i<8;i++) + { + res[i] = p->v; + p += stride; + } + return(res); + } + } + + template::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1) && (S<=9362),bool>::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,S); + return(vldrhq_gather_shifted_offset_z_s16(reinterpret_cast(p),offset,p0)); + }; + + template9362),bool>::type = true> + inline int16x8_t vload1_z(const Q15 *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + int16x8_t res; + std::size_t i=0; + for(;iv; + p += S; + } + + for(;i<8;i++) + { + res[i] = 0; + p += S; + } + + return(res); + + }; + + // Dynamic stride + inline int16x8_t vload1_z(const Q15 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + + if (stride <= 9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vldrhq_gather_shifted_offset_z_s16(reinterpret_cast(p),offset,p0)); + } + else + { + int16x8_t res; + std::size_t i=0; + for(;iv; + p += stride; + } + + for(;i<8;i++) + { + res[i] = 0; + p += stride; + } + return(res); + } + }; + + + template::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) && (S<=9362),bool>::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + //uint16x8_t offset={0,1,2,3,4,5,6,7}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vstrhq_scatter_shifted_offset_s16(reinterpret_cast(p),offset,val)); + }; + + template9362),bool>::type = true> + inline void vstore1(Q15 *p,const int16x8_t val) + { + for(std::size_t i=0;i<8;i++) + { + *p = Q15(val[i]); + p += S; + } + + }; + + // dynamic stride + inline void vstore1(Q15 *p,const index_t stride,const int16x8_t val) + { + if (stride <=9362) + { + uint16x8_t offset = vidupq_u16((uint32_t)0,1); + offset = vmulq_n_u16(offset,stride); + return(vstrhq_scatter_shifted_offset_s16(reinterpret_cast(p),offset,val)); + } + else + { + for(std::size_t i=0;i<8;i++) + { + *p = Q15(val[i]); + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrhq_p(reinterpret_cast(p),val,p0)); + }; + + template1) && (S<=9362),bool>::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + //uint16x8_t offset={0,1,2,3,4,5,6,7}; + //uint16x8_t offset = vidupq_u16((uint16_t)0,1); + //offset = vmulq_n_u16(offset,S); + constexpr uint16x8_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S}; + return(vstrhq_scatter_shifted_offset_p_s16(reinterpret_cast(p),offset,val,p0)); + }; + + + template9362),bool>::type = true> + inline void vstore1_z(Q15 *p,const int16x8_t val,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i(p),offset,val,p0)); + } + else + { + for(std::size_t i=0;i vmacc(const Q<33,30> sum, + const int16x8_t vala, + const int16x8_t valb) + { + return(Q<33,30>(vmlaldavaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const Q<33,30> sum, + const int16x8_t vala, + const int16x8_t valb, + const mve_pred16_t p0) + { + return(Q<33,30>(vmlaldavaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const int16x8_t vala, + const int16x8_t valb) + { + return(Q<33,30>(vmlaldavq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<33,30> vmacc(const int16x8_t vala, + const int16x8_t valb, + const mve_pred16_t p0) + { + return(Q<33,30>(vmlaldavq_p(vala,valb,p0))); + }; + + /** + * @brief Reduce accumulation value + * + * @param[in] sum The sum + * + * @return Reduced value + * + * Since the Helium instructions can accumulate vector product into a scalar + * there is no need to reduce the accumulator value. It is already in scalar + * form. + */ + __STATIC_FORCEINLINE Q<33,30> vreduce(const Q<33,30> sum) + { + return(sum); + }; + +}; +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Helium/q31.hpp b/dsppp/Include/dsppp/Helium/q31.hpp new file mode 100644 index 00000000..711e8a47 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q31.hpp @@ -0,0 +1,349 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ31Number Q31 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + + + +template +struct vector_traits::value>::type > +{ + typedef Q31 type; + typedef type::value_type storage_type; + typedef int32x4_t vector; + typedef Q<9,54> temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 4; + + + static Q<9,54> temp_acc_zero() + { + return(Q<9,54>()); + } + + static constexpr int16_t zero_lane() {return 0;}; + + static constexpr int16_t lane_value(const Q31 x) {return x.v;}; + +}; + +/** + * Inner implementation of Helium intrinsics + * \ingroup HeliumNumber + */ +namespace inner { + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp32q(v)); + }; + }; + + __STATIC_FORCEINLINE int32x4_t vconst(Q31 val) + { + return(vdupq_n_s32(val.v)); + } + + __STATIC_FORCEINLINE int32x4_t vconst_tail(Q31 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s32(val.v,p0)); + } + + + __STATIC_FORCEINLINE int32x4_t vneg(const int32x4_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int32x4_t vneg(const int32x4_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s32(),a,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const int32x4_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const Q31 b) + { + return(vqaddq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const Q31 a,const int32x4_t b) + { + return(vqaddq_n_s32(b,a.v)); + }; + + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s32(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vadd(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const int32x4_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const Q31 b) + { + return(vqsubq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const Q31 a,const int32x4_t b) + { + return(vqsubq_n_s32(b,a.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s32(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vsub(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const int32x4_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const Q31 b) + { + return(vqdmulhq_n_s32(a,b.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const Q31 a,const int32x4_t b) + { + return(vqdmulhq_n_s32(b,a.v)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s32(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int32x4_t vmul(const int32x4_t a,const Q31 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s32(vuninitializedq_s32(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int32x4_t vmul(const Q31 a,const int32x4_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s32(vuninitializedq_s32(),b,a.v,p0)); + }; + + template::type = true> + inline int32x4_t vload1(const Q31 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + template1),bool>::type = true> + inline int32x4_t vload1(const Q31 *p) + { + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vldrwq_gather_shifted_offset_s32(reinterpret_cast(p),offset)); + }; + + + // Dynamic stride + inline int32x4_t vload1(const Q31 *p,index_t stride) + { + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_s32(reinterpret_cast(p),offset)); + + } + + template::type = true> + inline int32x4_t vload1_z(const Q31 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1),bool>::type = true> + inline int32x4_t vload1_z(const Q31 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,S); + return(vldrwq_gather_shifted_offset_z_s32(reinterpret_cast(p),offset,p0)); + }; + + + + // Dynamic stride + inline int32x4_t vload1_z(const Q31 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vldrwq_gather_shifted_offset_z_s32(reinterpret_cast(p),offset,p0)); + + }; + + + template::type = true> + inline void vstore1(Q31 *p,const int32x4_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) ,bool>::type = true> + inline void vstore1(Q31 *p,const int32x4_t val) + { + + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + return(vstrwq_scatter_shifted_offset_s32(reinterpret_cast(p),offset,val)); + }; + + + + // dynamic stride + inline void vstore1(Q31 *p,const index_t stride,const int32x4_t val) + { + + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + return(vstrwq_scatter_shifted_offset_s32(reinterpret_cast(p),offset,val)); + } + + template::type = true> + inline void vstore1_z(Q31 *p,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrwq_p(reinterpret_cast(p),val,p0)); + }; + + template1),bool>::type = true> + inline void vstore1_z(Q31 *p,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + + constexpr uint32x4_t offset={0*S,1*S,2*S,3*S}; + vstrwq_scatter_shifted_offset_p_s32(reinterpret_cast(p),offset,val,p0); + }; + + + + // dynamic stride + inline void vstore1_z(Q31 *p,const index_t stride,const int32x4_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint32x4_t offset = vidupq_u32((uint32_t)0,1); + offset = vmulq_n_u32(offset,stride); + vstrwq_scatter_shifted_offset_p_s32(reinterpret_cast(p),offset,val,p0); + + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const Q<9,54> sum, + const int32x4_t vala, + const int32x4_t valb) + { + return(Q<9,54>(vrmlaldavhaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const Q<9,54> sum, + const int32x4_t vala, + const int32x4_t valb, + const mve_pred16_t p0) + { + return(Q<9,54>(vrmlaldavhaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const int32x4_t vala, + const int32x4_t valb) + { + return(Q<9,54>(vrmlaldavhq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<9,54> vmacc(const int32x4_t vala, + const int32x4_t valb, + const mve_pred16_t p0) + { + return(Q<9,54>(vrmlaldavhq_p(vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<15,48> vreduce(const Q<9,54> sum) + { + return(Q<15,48>(asrl(sum.v, 6))); + }; + +}; + +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Helium/q7.hpp b/dsppp/Include/dsppp/Helium/q7.hpp new file mode 100644 index 00000000..022d8517 --- /dev/null +++ b/dsppp/Include/dsppp/Helium/q7.hpp @@ -0,0 +1,467 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_MATH_MVEI +#define ARM_MATH_MVEF +#define ARM_MATH_MVE_FLOAT16 +#endif + +/** \addtogroup HeliumNumber Helium specific number definitions + * \ingroup NUMBER + * @{ + * \addtogroup HeliumQ7Number Q7 + * \ingroup HeliumNumber + * @{ + */ + + +/****************** + * + * Helium + * + */ +#if defined(ARM_MATH_MVEI) + + +template +struct vector_traits::value>::type > +{ + typedef Q7 type; + typedef type::value_type storage_type; + typedef int8x16_t vector; + typedef Q<17,14> temp_accumulator; + typedef mve_pred16_t predicate_t; + + static constexpr bool has_vector = true; + static constexpr bool is_float = false; + static constexpr bool is_fixed = true; + static constexpr bool has_predicate = true; + + static constexpr int nb_lanes = 16; + + + static Q<17,14> temp_acc_zero() + { + return(Q<17,14>()); + } + + static constexpr int8_t zero_lane() {return 0;}; + + static constexpr int8_t lane_value(const Q7 x) {return x.v;}; + +}; + +/** + * Inner implementation of Helium intrinsics + * \ingroup HeliumNumber + */ +namespace inner { + + + template<> + struct vctpq{ + static mve_pred16_t mk(uint32_t v) + { + return(vctp8q(v)); + }; + }; + + __STATIC_FORCEINLINE int8x16_t vconst(Q7 val) + { + return(vdupq_n_s8(val.v)); + } + + __STATIC_FORCEINLINE int8x16_t vconst_tail(Q7 val, + const mve_pred16_t p0) + { + return(vdupq_x_n_s8(val.v,p0)); + } + + + __STATIC_FORCEINLINE int8x16_t vneg(const int8x16_t a) + { + return(vqnegq(a)); + }; + + __STATIC_FORCEINLINE int8x16_t vneg(const int8x16_t a, + const mve_pred16_t p0) + { + return(vqnegq_m(vuninitializedq_s8(),a,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const int8x16_t b) + { + return(vqaddq(a,b)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const Q7 b) + { + return(vqaddq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const Q7 a,const int8x16_t b) + { + return(vqaddq_n_s8(b,a.v)); + }; + + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqaddq_m(vuninitializedq_s8(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vadd(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqaddq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const int8x16_t b) + { + return(vqsubq(a,b)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const Q7 b) + { + return(vqsubq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const Q7 a,const int8x16_t b) + { + return(vqsubq_n_s8(b,a.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqsubq_m(vuninitializedq_s8(),a,b,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vsub(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqsubq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const int8x16_t b) + { + return(vqdmulhq(a,b)); + }; + + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const Q7 b) + { + return(vqdmulhq_n_s8(a,b.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const Q7 a,const int8x16_t b) + { + return(vqdmulhq_n_s8(b,a.v)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m(vuninitializedq_s8(),a,b,p0)); + }; + + + __STATIC_FORCEINLINE int8x16_t vmul(const int8x16_t a,const Q7 b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s8(vuninitializedq_s8(),a,b.v,p0)); + }; + + __STATIC_FORCEINLINE int8x16_t vmul(const Q7 a,const int8x16_t b, + const mve_pred16_t p0) + { + return(vqdmulhq_m_n_s8(vuninitializedq_s8(),b,a.v,p0)); + }; + + template::type = true> + inline int8x16_t vload1(const Q7 *p) + { + return(vld1q(reinterpret_cast(p))); + }; + + /* + + 15*S <= 255 => S <= 17 + + */ + template1) && (S<=17),bool>::type = true> + inline int8x16_t vload1(const Q7 *p) + { + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + return(vldrbq_gather_offset_s8(reinterpret_cast(p),offset)); + }; + + template17),bool>::type = true> + inline int8x16_t vload1(const Q7 *p) + { + int8x16_t res; + for(std::size_t i=0;i<16;i++) + { + res[i] = p->v; + p += S; + } + + return(res); + }; + + // Dynamic stride + inline int8x16_t vload1(const Q7 *p,index_t stride) + { + if (stride <= 17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vldrbq_gather_offset_s8(reinterpret_cast(p),offset)); + } + else + { + int8x16_t res; + for(std::size_t i=0;i<16;i++) + { + res[i] = p->v; + p += stride; + } + return(res); + } + } + + template::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vld1q_z(reinterpret_cast(p),p0)); + + }; + + template1) && (S<=17),bool>::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,S); + return(vldrbq_gather_offset_z_s8(reinterpret_cast(p),offset,p0)); + }; + + template17),bool>::type = true> + inline int8x16_t vload1_z(const Q7 *p,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + int8x16_t res; + std::size_t i=0; + for(;iv; + p += S; + } + + for(;i<16;i++) + { + res[i] = 0; + p += S; + } + + return(res); + + }; + + // Dynamic stride + inline int8x16_t vload1_z(const Q7 *p,index_t stride,std::size_t nb,mve_pred16_t p0) + { + + if (stride <= 17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vldrbq_gather_offset_z_s8(reinterpret_cast(p),offset,p0)); + } + else + { + int8x16_t res; + std::size_t i=0; + for(;iv; + p += stride; + } + + for(;i<16;i++) + { + res[i] = 0; + p += stride; + } + return(res); + } + }; + + + template::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + vst1q(reinterpret_cast(p),val); + }; + + template1) && (S<=17),bool>::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + //uint8x16_t offset={0,1,2,3,4,5,6,7}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + return(vstrbq_scatter_offset_s8(reinterpret_cast(p),offset,val)); + }; + + template17),bool>::type = true> + inline void vstore1(Q7 *p,const int8x16_t val) + { + for(std::size_t i=0;i<16;i++) + { + *p = Q7(val[i]); + p += S; + } + + }; + + // dynamic stride + inline void vstore1(Q7 *p,const index_t stride,const int8x16_t val) + { + if (stride <=17) + { + uint8x16_t offset = vidupq_u8((uint32_t)0,1); + offset = vmulq_n_u8(offset,stride); + return(vstrbq_scatter_offset_s8(reinterpret_cast(p),offset,val)); + } + else + { + for(std::size_t i=0;i<16;i++) + { + *p = Q7(val[i]); + p += stride; + } + } + } + + template::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + return(vstrbq_p(reinterpret_cast(p),val,p0)); + }; + + template1) && (S<=17),bool>::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)nb; + //uint8x16_t offset={0,1,2,3,4,5,6,7}; + //uint8x16_t offset = vidupq_u8((uint16_t)0,1); + //offset = vmulq_n_u8(offset,S); + constexpr uint8x16_t offset={0*S,1*S,2*S,3*S,4*S,5*S,6*S,7*S, + 8*S,9*S,10*S,11*S,12*S,13*S,14*S,15*S}; + return(vstrbq_scatter_offset_p_s8(reinterpret_cast(p),offset,val,p0)); + }; + + + template17),bool>::type = true> + inline void vstore1_z(Q7 *p,const int8x16_t val,std::size_t nb,mve_pred16_t p0) + { + (void)p0; + for(std::size_t i=0;i(p),offset,val,p0)); + } + else + { + for(std::size_t i=0;i vmacc(const Q<17,14> sum, + const int8x16_t vala, + const int8x16_t valb) + { + return(Q<17,14>(vmladavaq(sum.v,vala,valb))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const Q<17,14> sum, + const int8x16_t vala, + const int8x16_t valb, + const mve_pred16_t p0) + { + return(Q<17,14>(vmladavaq_p(sum.v,vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const int8x16_t vala, + const int8x16_t valb) + { + return(Q<17,14>(vmladavq(vala,valb))); + }; + + __STATIC_FORCEINLINE Q<17,14> vmacc(const int8x16_t vala, + const int8x16_t valb, + const mve_pred16_t p0) + { + return(Q<17,14>(vmladavq_p(vala,valb,p0))); + }; + + __STATIC_FORCEINLINE Q<17,14> vreduce(const Q<17,14> sum) + { + return(sum); + }; + +}; +#endif + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/Neon/basic.hpp b/dsppp/Include/dsppp/Neon/basic.hpp new file mode 100644 index 00000000..828c9648 --- /dev/null +++ b/dsppp/Include/dsppp/Neon/basic.hpp @@ -0,0 +1,133 @@ +// -*- C++ -*- + +#pragma once + +#include +#include +#include +#if 0 + +template +void _Add(const T* pSrcA, + const T* pSrcB, + T* pDst, + const std::size_t l, + const Neon* = nullptr, + typename std::enable_if::is_float && + vector_traits::has_vector,T>::type* = nullptr) +{ + using num = vector_traits; + using VecType = typename num::vector; + constexpr int nb_lanes = num::nb_lanes; + constexpr int lanes_shift = shiftFromValue(nb_lanes); + constexpr int lanes_mask = maskFromShift(lanes_shift); + + //std::cout << "Neon float\r\n" ; + + uint32_t blkCnt; /* Loop counter */ + + VecType vec1; + VecType vec2; + VecType res; + + /* Compute several lanes at a time */ + blkCnt = l >> lanes_shift; + + while (blkCnt > 0U) + { + /* C = A + B */ + + /* Add and then store the results in the destination buffer. */ + vec1 = vld1q(pSrcA); + vec2 = vld1q(pSrcB); + res = vaddq(vec1, vec2); + vst1q(pDst, res); + + /* Increment pointers */ + pSrcA += nb_lanes; + pSrcB += nb_lanes; + pDst += nb_lanes; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Tail */ + blkCnt = l & lanes_mask; + + while (blkCnt > 0U) + { + /* C = A + B */ + + /* Add and store result in destination buffer. */ + *pDst++ = (*pSrcA++) + (*pSrcB++); + + /* Decrement loop counter */ + blkCnt--; + } + + +}; + + + + +template +void _Add(const T* pSrcA_Q, + const T* pSrcB_Q, + T* pDst_Q, + const std::size_t l, + const Neon* = nullptr, + typename std::enable_if::is_fixed && + vector_traits::has_vector,T>::type* = nullptr) +{ + using num = vector_traits; + using VecType = typename num::vector; + using value_type = typename T::value_type; + constexpr int nb_lanes = num::nb_lanes; + constexpr int lanes_shift = shiftFromValue(nb_lanes); + constexpr int lanes_mask = maskFromShift(lanes_shift); + const value_type *pSrcA = reinterpret_cast(pSrcA_Q); + const value_type *pSrcB = reinterpret_cast(pSrcB_Q); + value_type *pDst = reinterpret_cast(pDst_Q); + + uint32_t blkCnt; /* loop counters */ + VecType vecA; + VecType vecB; + + /* Compute 8 outputs at a time */ + blkCnt = l >> lanes_shift; + while (blkCnt > 0U) + { + /* + * C = A + B + * Add and then store the results in the destination buffer. + */ + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + vst1q(pDst, vqaddq(vecA, vecB)); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + /* + * advance vector source and destination pointers + */ + pSrcA += nb_lanes; + pSrcB += nb_lanes; + pDst += nb_lanes; + } + /* + * tail + */ + blkCnt = l & lanes_mask; + if (blkCnt > 0U) + { + mve_pred16_t p0 = num::vctpq(blkCnt); + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + vstrq_p(pDst, vqaddq(vecA, vecB), p0); + } +} + +#endif \ No newline at end of file diff --git a/dsppp/Include/dsppp/Neon/float.hpp b/dsppp/Include/dsppp/Neon/float.hpp new file mode 100644 index 00000000..0dc95759 --- /dev/null +++ b/dsppp/Include/dsppp/Neon/float.hpp @@ -0,0 +1,105 @@ +// -*- C++ -*- + +#pragma once + +/****************** + * + * Neon + * + */ +#if defined(ARM_MATH_NEON) + +template +struct vector_traits::value>::type> +{ + typedef float type; + typedef float storage_type; + typedef float32x4_t vector; + static constexpr bool has_vector = true; + static constexpr bool is_float = true; + static constexpr bool is_fixed = false; + + static constexpr int nb_lanes = 4; + + static constexpr float zero_lane() {return 0.0f;}; + + +}; + +namespace inner { + + + + __STATIC_FORCEINLINE float32x4_t vadd(const float32x4_t a,const float32x4_t b) + { + return(vaddq_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vmul(const float32x4_t a,const float32x4_t b) + { + return(vmulqq_f32(a,b)); + }; + + __STATIC_FORCEINLINE float32x4_t vconst(const float v) + { + const float32x4_t t = vdupq_n_f32(v) + return(t); + } + + template::type = true> + inline float32x4_t vload1(const float32_t *p) + { + return(vld1q(p)); + }; + + template1),bool>::type = true> + inline float32x4_t vload1(const float32_t *p) + { + float32x4_t res; + res[0] = *p; + p += S; + + res[1] = *p; + p += S; + + res[2] = *p; + p += S; + + res[3] = *p; + p += S; + + return(res); + }; + + template::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + return(vst1q(p,val)); + }; + + template1),bool>::type = true> + inline void vstore1(float32_t *p,const float32x4_t val) + { + *p = val[0]; + p += S; + + *p = val[1]; + p += S; + + *p = val[2]; + p += S; + + *p = val[3]; + p += S; + }; + + + +}; + +#endif diff --git a/dsppp/Include/dsppp/Neon/num_features.hpp b/dsppp/Include/dsppp/Neon/num_features.hpp new file mode 100644 index 00000000..142d4607 --- /dev/null +++ b/dsppp/Include/dsppp/Neon/num_features.hpp @@ -0,0 +1,5 @@ +// -*- C++ -*- + +#pragma once + +#include "float.hpp" diff --git a/dsppp/Include/dsppp/Scalar/basic.hpp b/dsppp/Include/dsppp/Scalar/basic.hpp new file mode 100644 index 00000000..1b10de60 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/basic.hpp @@ -0,0 +1,255 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup ARCHALG Architecture specific algorithm + * \ingroup DSPPP + * \addtogroup SCALARALG Scalar algorithm + * \ingroup ARCHALG + * @{ + */ + + +#define SCALAR_UNROLL 2 + +/** + * @brief Fill evaluator for scalar architecture + * + * @param v Destination vector + * @param[in] val Initialization value + * @param[in] l Length of vector + * + * @tparam T Scalar datatype + * @tparam DST VEctor / Matrix datatype + * @tparam Test to restrict to vector addressing + * and compatible datatype + * + */ +template::value && + SameElementType::value,bool>::type = true> +inline void _Fill(DST &v, + const T val, + vector_length_t l, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t i; + + UNROLL_LOOP + for(i=0 ; i <= l-(1< Check DST has matrix indexing only + */ +template() && + SameElementType::value,bool>::type = true> +inline void _Fill2D(DST &v, + const T val, + const vector_length_t rows, + const vector_length_t cols, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t row=0; + + + for(; row <= rows-(1< Check vectors are compatible + */ +template(),bool>::type = true> +inline void eval(DA &v, + const DB& other, + const vector_length_t l, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t i=0; + + for(i=0 ; i <= l-(1< Check only support matrix indexing + */ +template(),bool>::type = true> +inline void eval2D(DA &v, + const DB& other, + const vector_length_t rows, + const vector_length_t cols, + const Scalar* = nullptr) +{ + constexpr unsigned int U = SCALAR_UNROLL; + index_t row=0; + + + for(; row <= rows-(1< Check vector expressions are compatible + * + * @return Dot product result + */ +template(),bool>::type = true> +inline DotResult _dot(const DA& a, + const DB& b, + const vector_length_t l, + const Scalar* = nullptr) +{ + using Acc = DotResult; + constexpr unsigned int U = SCALAR_UNROLL; + index_t i; + + Acc acc = Acc{}; + + for(i=0 ; i <= l-(1< Check vectors are compatible + */ +template(),bool>::type = true> +inline void _swap(DA&& a, + DB&& b, + const vector_length_t l, + const Scalar* = nullptr) +{ + for(index_t i=0;i +__STATIC_INLINE void _arm_mat_trans( + const MA &src, + MB &dst, + const Scalar* = nullptr) +{ + DISABLE_LOOP_UNROLL + for(index_t r=0;r < dst.rows() ; r++) + { + dst.row(r) = copy(src.col(r)); + } +} + +/** + * @brief Matrix times vector for scalar architecture + * + * @param res Destination + * @param[in] m Matrix + * @param[in] v Vector (my be expression) + * + * @tparam M Matrix datatype + * @tparam V Vector datatype + * @tparam RES Result datatype + */ +template +inline void _dot_m_v(RES &res, + const M&m,const V&v, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + uint32_t numRows = m.rows(); + uint32_t numCols = m.columns(); + const T *pSrcA = m.ptr(); + const T *pInA1; /* input data matrix pointer A of Q31 type */ + const T *pInA2; /* input data matrix pointer A of Q31 type */ + const T *pInA3; /* input data matrix pointer A of Q31 type */ + const T *pInA4; /* input data matrix pointer A of Q31 type */ + T *px; /* Temporary output data matrix pointer */ + uint32_t i; + uint16_t row, colCnt; /* loop counters */ + T matData, matData2, vecData, vecData2; + + + /* Process 4 rows at a time */ + row = numRows >> 2; + i = 0u; + px = res.ptr(); + + /* The following loop performs the dot-product of each row in pSrcA with the vector */ + /* row loop */ + while (row > 0) { + /* Initialize accumulators */ + Acc sum1 = Acc{}; + Acc sum2 = Acc{}; + Acc sum3 = Acc{}; + Acc sum4 = Acc{}; + + + /* Loop unrolling: process 2 columns per iteration */ + //colCnt = numCols; + + /* Initialize pointers to the starting address of the column being processed */ + pInA1 = pSrcA + i; + pInA2 = pInA1 + m.stride(); + pInA3 = pInA2 + m.stride(); + pInA4 = pInA3 + m.stride(); + + + // Main loop: matrix-vector multiplication + for(colCnt = 0 ; colCnt < numCols; colCnt ++) + { + // Read 2 values from vector + vecData = v[colCnt]; + // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate + matData = *(pInA1)++; + sum1 = inner::mac(sum1, matData, vecData); + matData = *(pInA2)++; + sum2 = inner::mac(sum2, matData, vecData); + matData = *(pInA3)++; + sum3 = inner::mac(sum3, matData, vecData); + matData = *(pInA4)++; + sum4 = inner::mac(sum4, matData, vecData); + } + + /* Saturate and store the result in the destination buffer */ + *px++ = inner::from_accumulator(sum1); + *px++ = inner::from_accumulator(sum2); + *px++ = inner::from_accumulator(sum3); + *px++ = inner::from_accumulator(sum4); + + i = i + m.stride() * 4; + + /* Decrement the row loop counter */ + row--; + } + + /* process any remaining rows */ + row = numRows & 3u; + while (row > 0) { + + Acc sum = Acc{}; + pInA1 = pSrcA + i; + + int32_t k=0; + for(k=0; k <= (int)numCols-2; k += 2) + { + vecData = v[k]; + vecData2 = v[k+1]; + matData = *(pInA1)++; + matData2 = *(pInA1)++; + sum = inner::mac(sum, matData, vecData); + sum = inner::mac(sum, matData2, vecData2); + } + // process remainder of row + + + for(; k < (int)numCols; k ++) + { + sum = inner::mac(sum ,*pInA1++, v[k]); + } + + *px++ = inner::from_accumulator(sum); + i = i + m.stride(); + row--; + } +} + +#include "matrix_multiply_fixed.hpp" +#include "matrix_multiply_float.hpp" + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp b/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp new file mode 100644 index 00000000..1f5bab39 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/matrix_multiply_fixed.hpp @@ -0,0 +1,138 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup SCALARALG + * @{ + */ + +/** + * @brief Matrix times matrix for scalar architecture and fixed point + * + * @param[in] pSrcA The source a + * @param[in] pSrcB The source b + * @param pDst The destination + * @param[in] BT Temporary matrix for transposition + * + * @tparam MA Left hand side datatype + * @tparam MB Right hand side datatype + * @tparam RES Destination datatype + * @tparam TMP Temporary matrix datatype + * @tparam Check fixed point arithmetic used + */ +template::Scalar>::is_fixed,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const TMP &BT, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + + T *pIn1 = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pIn2 = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pOut = pDst.ptr(); /* Output data matrix pointer */ + T *px; /* Temporary output data matrix pointer */ + Acc sum; /* Accumulator */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint32_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + + (void)BT; + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB.ptr(); + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = Acc{}; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum,*pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum ,*pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Convert result from 2.62 to 1.31 format and store in destination buffer */ + *px++ = inner::from_accumulator(sum); + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col) ; + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + pDst.stride(); + pInA = pInA + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + +} + + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp b/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp new file mode 100644 index 00000000..5da4cb42 --- /dev/null +++ b/dsppp/Include/dsppp/Scalar/matrix_multiply_float.hpp @@ -0,0 +1,131 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup SCALARALG + * @{ + */ + +/** + * @brief Matrix times matrix for scalar architecture and float + * + * @param[in] pSrcA The source a + * @param[in] pSrcB The source b + * @param pDst The destination + * + * @tparam MA Left hand side datatype + * @tparam MB Right hand side datatype + * @tparam RES Result datatype + * @tparam Check if float + */ +template::Scalar>::is_float,bool>::type = true> +__STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB, + RES &&pDst, + const Scalar* = nullptr) +{ + using T = typename traits::Scalar; + using Acc = typename number_traits::accumulator; + //using Comp = typename number_traits::compute_type; + T *pIn1 = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pIn2 = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pInA = pSrcA.ptr(); /* Input data matrix pointer A */ + T *pInB = pSrcB.ptr(); /* Input data matrix pointer B */ + T *pOut = pDst.ptr(); /* Output data matrix pointer */ + T *px; /* Temporary output data matrix pointer */ + Acc sum; /* Accumulator */ + uint16_t numRowsA = pSrcA.rows(); /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB.columns(); /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA.columns(); /* Number of columns of input matrix A */ + uint32_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + + + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB.ptr(); + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = Acc{}; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum = inner::mac(sum, *pIn1++, *pIn2); + pIn2 += pSrcB.stride(); + + /* Decrement loop counter */ + colCnt--; + } + + /* Store result in destination buffer */ + *px++ = inner::from_accumulator(sum); + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col); + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + pDst.stride(); + pInA = pInA + pSrcA.stride(); + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + +} + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/algorithms.hpp b/dsppp/Include/dsppp/algorithms.hpp new file mode 100644 index 00000000..641e3326 --- /dev/null +++ b/dsppp/Include/dsppp/algorithms.hpp @@ -0,0 +1,313 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \defgroup DSPPP C++ extension + * C++ template extension to CMSIS-DSP. It is not yet part of + * the pack but the headers can be found on the + * [CMSIS-DSP github](https://github.com/ARM-software/CMSIS-DSP/dsppp/Include) + * The principles are described in this @ref dsppp_main "page" + */ + + +/** +In this file we have kernels that are written in an +architecture independant way (using operators of the library) + +*/ + +namespace arm_cmsis_dsp { + +/** \addtogroup ALGO Architecture independent algorithms + * \ingroup DSPPP + * Algorithms written in an architecture independent way + */ + +/* + +Matrix transpose + +*/ + + + + /** @ingroup ALGO + * @brief Transpose a matrix. + * + * @tparam MA Any matrix type + * @tparam MB Any matrix type + * @param dst Destination matrix. + * @param src Source matrix. + * + */ +template::value && + HasMatrixIndexing::value /*&& + SameElementType::value*/,bool>::type = true> +inline void transposeTo(MA &dst, + const MB& src) +{ + _arm_mat_trans(src,dst,CURRENT_ARCH); +} + + + +/* + +Init a diagonal matrix (0 outside of diagonal) + +*/ +template typename A, + typename VB, +typename std::enable_if::value && + SameElementType::value,bool>::type = true> +inline void _diagonal(Matrix &v, + const VB& other, + const vector_length_t rows) +{ + UNROLL_LOOP + for(index_t r=0;r < rows ; r++) + { + v.row(r) = P{}; + v(r,r) = other[r]; + } +} + + +/* + + +Fill diagonal of an existing matrix + +*/ +template typename A, + typename VB, +typename std::enable_if::value && + SameElementType::value,bool>::type = true> +inline void _fill_diagonal(Matrix &v, + const VB& other, + const vector_length_t rows) +{ + for(index_t r=0;r < rows ; r++) + { + v(r,r) = other[r]; + } +} + +template typename A> +inline void _identity(Matrix &v, + const vector_length_t rows) +{ + UNROLL_LOOP + for(index_t r=0;r < rows ; r++) + { + v.row(r) = P{}; + v(r,r) = number_traits

::one(); + } +} + + +/** + * @ingroup ALGO + * @brief Matrix x Vector product. + * + * @tparam M Any matrix type + * @tparam V Any vector type + * @param m matrix. + * @param v vector. + * @return The matrix x vector product + * + */ +template::value,bool>::type = true> +inline typename OutputVector::type dot(const M&m,const V&v) +{ + typename OutputVector::type res; + _dot_m_v(res,m,v,CURRENT_ARCH); + return(res); +} + +template::value,bool>::type = true> +inline typename OutputVector::type dot(const M&m,const V&v) +{ + typename OutputVector::type res(m.rows()); + _dot_m_v(res,m,v,CURRENT_ARCH); + return(res); +} + +template::value,bool>::type = true> +inline void dot(RES && res,const M&m,const V&v) +{ + //typename OutputVector::type res(m.rows()); + _dot_m_v(res,m,v,CURRENT_ARCH); +} + + + /** @ingroup ALGO + * @brief Matrix x Matrix product. + * + * @tparam MA Any matrix type + * @tparam MB Any matrix type + * @param ma Matrix. + * @param mb Matrix. + * @return ma x mb matrix product + * + */ +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + + typename OutputMatrix::type res; + auto BT = mb.transpose(); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,BT,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + + typename OutputMatrix::type res; + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + auto BT = mb.transpose(); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,BT,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + + /** @ingroup ALGO + * @brief Matrix x Matrix product + * + * @tparam MA Any matrix type + * @tparam MB Any matrix type + * @tparam RES Any matrix type + * @param res Output matrix. Result of ma x mb is written to this argument + * @param ma Matrix. + * @param mb Matrix. + * + * Used in dynamic mode (dimension of matrix not know at build time) + * to avoid a memory allocation if the result matrix is already available + * (Enable to reuse the same matrix storage for the result in some algorithms) + * + */ +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline void dot(RES &&res,const MA&ma,const MB&mb) +{ + //typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(BT)>; + _dot_m_m(ma,mb,std::forward(res),CURRENT_ARCH); +} + +template::value && + number_traits::Scalar>::is_float,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(mbt)>; + _dot_m_m(ma,mb,res,CURRENT_ARCH); + return(res); +} + +template::value && + number_traits::Scalar>::is_fixed,bool>::type = true> +inline typename OutputMatrix::type dot(const MA&ma,const MB&mb,const TMP &mbt) +{ + typename OutputMatrix::type res(ma.rows(),mb.columns()); + + //using M = MatMult::type,MA,MB,typename OutputMatrix::type,decltype(mbt)>; + _dot_m_m(ma,mb,res,mbt,CURRENT_ARCH); + return(res); +} + + + /** @ingroup ALGO + * @brief Create identity matrix + * + * @tparam P Datatype of matrix elements + * @param l Dimension of matrix (l x l) + * @return Identity matrix. It is a dynamic matrix (size not know at build time) + * + */ +template +Matrix mk_identity(const vector_length_t l) +{ + Matrix res(l,l); + _identity(res,l); + return(res); +}; + + + /** @ingroup ALGO + * @brief Create identity matrix + * + * @tparam P Datatype of matrix elements + * @tparam L Matrix dimension (L x L) + * @return Identity matrix. It is a static matrix : size known at build time. + * + */ +template +Matrix mk_identity() +{ + Matrix res; + _identity(res,L); + return(res); +}; + +} diff --git a/dsppp/Include/dsppp/arch.hpp b/dsppp/Include/dsppp/arch.hpp new file mode 100644 index 00000000..7326ba18 --- /dev/null +++ b/dsppp/Include/dsppp/arch.hpp @@ -0,0 +1,64 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +namespace arm_cmsis_dsp { + +/** \addtogroup ARCH Architecture detection + * \ingroup DSPPP + * @{ + */ + +/** + * Scalar architecture + */ +class Scalar {}; + +/** + * Architecture supporting DSP extensions + */ +class DSP:public Scalar {}; + +/** + * v8.1M Architecture + */ +class Helium:public DSP {}; + +/** + * v8.2M Architecture + */ +class Helium82:public Helium {}; + +/** + * Architecture supporting Neon + */ +class Neon:public Scalar {}; + +/*! @} */ + +} + +#include "arch_detection.hpp" + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +#define ARCH Helium82 +#elif defined(ARM_MATH_DSP) +#define ARCH DSP +#elif defined(ARM_MATH_NEON) +#define ARCH Neon +#else +#define ARCH Scalar +#endif + +#define CURRENT_ARCH (ARCH*)nullptr + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_DSP) || defined(ARM_MATH_NEON) +#define HAS_VECTOR +#endif + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +#define HAS_PREDICATED_LOOP +#endif + diff --git a/dsppp/Include/dsppp/arch_detection.hpp b/dsppp/Include/dsppp/arch_detection.hpp new file mode 100644 index 00000000..d8194b40 --- /dev/null +++ b/dsppp/Include/dsppp/arch_detection.hpp @@ -0,0 +1,281 @@ +// -*- C++ -*- + +#pragma once + + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + #pragma GCC diagnostic ignored "-Wold-style-cast" + +#elif defined ( __GNUC__ ) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wsign-conversion" + #pragma GCC diagnostic ignored "-Wconversion" + #pragma GCC diagnostic ignored "-Wunused-parameter" + #define GCC_COMPILER + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + + +/* Included for instrinsics definitions */ +#if defined (_MSC_VER ) +#include +#define __STATIC_FORCEINLINE static __forceinline +#define __STATIC_INLINE static __inline +#define __ALIGNED(x) __declspec(align(x)) +#define __WEAK +#elif defined ( __APPLE_CC__ ) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline +#define __WEAK +#elif defined (__GNUC_PYTHON__) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline +#define __WEAK +#else +#include "cmsis_compiler.h" +#endif + + + +#include +#include +#include +#include + +/* evaluate ARM DSP feature */ +#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) + #define ARM_MATH_DSP 1 +#endif + +#if defined(ARM_MATH_NEON) + #if defined(_MSC_VER) && defined(_M_ARM64EC) + #include + #else + #include + #endif + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + #if !defined(ARM_MATH_NEON_FLOAT16) + #define ARM_MATH_NEON_FLOAT16 + #endif + #endif +#endif + +#if !defined(ARM_MATH_AUTOVECTORIZE) + + +#if defined(__ARM_FEATURE_MVE) +#if __ARM_FEATURE_MVE + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif +#endif + +#if (__ARM_FEATURE_MVE & 2) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + +#endif /*defined(__ARM_FEATURE_MVE)*/ +#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/ + + +#if defined (ARM_MATH_HELIUM) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif + + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + + + +#if defined ( __CC_ARM ) + /* Enter low optimization region - place directly above function definition */ + #if defined( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("push") \ + _Pragma ("O1") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_EXIT \ + _Pragma ("pop") + #else + #define LOW_OPTIMIZATION_EXIT + #endif + + /* Enter low optimization region - place directly above function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __APPLE_CC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __GNUC__ ) + #define LOW_OPTIMIZATION_ENTER \ + __attribute__(( optimize("-O1") )) + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __ICCARM__ ) + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define LOW_OPTIMIZATION_EXIT + + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TI_ARM__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __CSMC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TASKING__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT +#endif + + + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + +#elif defined ( __GNUC__ ) +#pragma GCC diagnostic pop + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + +#ifdef __cplusplus +} +#endif + +#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE +#include +#endif + +#if !(__ARM_FEATURE_MVE & 2) + #if !defined(DISABLEFLOAT16) + #if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE) + typedef __fp16 float16_t; + #define ARM_FLOAT16_SUPPORTED + #endif + #endif +#else + /* When Vector float16, this flag is always defined and can't be disabled */ + #define ARM_FLOAT16_SUPPORTED +#endif + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if defined(__ICCARM__) + +#define F16INFINITY ((float16_t) INFINITY) + +#else + +#define F16INFINITY ((float16_t)__builtin_inf()) + +#endif + +#endif + + + + diff --git a/dsppp/Include/dsppp/common.hpp b/dsppp/Include/dsppp/common.hpp new file mode 100644 index 00000000..5ade21ef --- /dev/null +++ b/dsppp/Include/dsppp/common.hpp @@ -0,0 +1,79 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include + +// For compiler detection +#include "arch.hpp" + + +#define ARM_COMPUTE_DISABLE_UNROLL +// For loop (not for fusion unrolling functions) +#define MEMORY_POOL_ALIGNMENT 128 +//#define MEMORY_ALLOCATION_DEBUG + +// TMP_ALLOC must be defined to use the library +// It is generally defined in an external header not +// part of the library. +// By default it is using the malloc allocator + +#ifndef TMP_ALLOC +#define TMP_ALLOC malloc_allocator +#endif + +#if !defined(GCC_COMPILER) +// clang / AC6 +#if defined(ARM_COMPUTE_DISABLE_UNROLL) +#define UNROLL_LOOP _Pragma ("clang loop unroll(disable)") +#else +#define UNROLL_LOOP _Pragma("clang loop unroll_count(4)") +#endif + +#define DISABLE_LOOP_UNROLL _Pragma("clang loop unroll(disable)") + +#else +// GCC +#define UNROLL_LOOP +#define DISABLE_LOOP_UNROLL +#endif + +namespace arm_cmsis_dsp { + +/** \addtogroup COMMON Common types and constants + * \ingroup DSPPP + * @{ + */ + //! Dynamic objects (dimensions only known at runtime) + constexpr int DYNAMIC = -1; + + //! Dynamic objects (dimensions only known at runtime) but with some constraints (like stride == nb_cols) + constexpr int CONSTRAINED_DYNAMIC = -2; + + //! index datatype. It must be a signed datatype + typedef int32_t index_t; + //! Vector length datatype. Iy must be a signed datatype. + typedef int32_t vector_length_t; + +/*! @} */ + +/** \addtogroup DEBUG Tools for debugging + * \ingroup DSPPP + * @{ + */ + +/** + * @brief Prints a textual representation of a type. + * + * @tparam T The datatype to display + */ +template +void PrintType(void) +{ + //T t; + std::cout << __PRETTY_FUNCTION__ << "\r\n"; +}; + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/fixed_point.hpp b/dsppp/Include/dsppp/fixed_point.hpp new file mode 100644 index 00000000..cbb791d1 --- /dev/null +++ b/dsppp/Include/dsppp/fixed_point.hpp @@ -0,0 +1,2303 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include "arch.hpp" +#include + +#include +#include +namespace arm_cmsis_dsp { + +/** \addtogroup FIXED Fixed point datatypes + * \ingroup DSPPP + * @{ + */ + +/* + +Normally those kind of definitions are in a compiler file +in Core or Core_A. + +But for MSVC compiler it is a bit special. The goal is very specific +to CMSIS-DSP and only to allow the use of this library from other +systems like Python or Matlab. + +MSVC is not going to be used to cross-compile to ARM. So, having a MSVC +compiler file in Core or Core_A would not make sense. + +*/ +#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__) +__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) +{ + if (data == 0U) { return 32U; } + + uint32_t count = 0U; + uint32_t mask = 0x80000000U; + + while ((data & mask) == 0U) + { + count += 1U; + mask = mask >> 1U; + } + return count; +} + +__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) +{ + if ((sat >= 1U) && (sat <= 32U)) + { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) + { + return max; + } + else if (val < min) + { + return min; + } + } + return val; +} + +__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) +{ + if (sat <= 31U) + { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) + { + return max; + } + else if (val < 0) + { + return 0U; + } + } + return (uint32_t)val; +} +#endif + +#if !defined(ARM_MATH_DSP) +__STATIC_FORCEINLINE int32_t clip_int64_to_q31( + int64_t x) + { + return ((int32_t) (x >> 32) != ((int32_t) x >> 31)) ? + ((0x7FFFFFFF ^ ((int32_t) (x >> 63)))) : (int32_t) x; + } + +__STATIC_FORCEINLINE int32_t __QADD( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_int64_to_q31((int64_t)x + (int32_t)y))); + } + + + /* + * @brief C custom defined QSUB + */ + __STATIC_FORCEINLINE int32_t __QSUB( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_int64_to_q31((int64_t)x - (int32_t)y))); + } + + +#endif + +/** + * @brief Function to identify the template for fixed number + * representable on 64 bits + * + * @param M number of mantissa bit (without sign bit) + * @param F number of fractional bits + * @param S sign or unsigned + * @return True if the template must be selected + */ +constexpr bool test64(const int M,const int F,const int S){return((M+F+S)>32 && (M+F+S)<=64);} + +/** + * @brief Function to identify the template for fixed number + * representable on 32 bits + * + * @param M number of mantissa bit (without sign bit) + * @param F number of fractional bits + * @param S sign or unsigned + * @return True if the template must be selected + */ +constexpr bool test32(const int M,const int F,const int S){return((M+F+S)>16 && (M+F+S)<=32);} + +/** + * @brief Function to identify the template for fixed number + * representable on 16 bits + * + * @param M number of mantissa bit (without sign bit) + * @param F number of fractional bits + * @param S sign or unsigned + * @return True if the template must be selected + */ +constexpr bool test16(const int M,const int F,const int S){return((M+F+S)>8 && (M+F+S)<=16);} + +/** + * @brief Function to identify the template for fixed number + * representable on 8 bits + * + * @param M number of mantissa bit (without sign bit) + * @param F number of fractional bits + * @param S sign or unsigned + * @return True if the template must be selected + */ +constexpr bool test8 (const int M,const int F,const int S){return((M+F+S)<=8);} + +/** + * @brief Storage type for a fixed point number + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * @tparam s signed or unsigned + * + */ +template +struct fixed_storage_type +{ +}; + + +/** + * @brief Storage type for a fixed point number representable on int64 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef int64_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef int64_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef int32_t narrow_type; +}; + +/** + * @brief Storage type for a fixed point number representable on uint64 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef uint64_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef uint64_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef uint32_t narrow_type; +}; + + +/** + * @brief Storage type for a fixed point number representable on int32 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef int32_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef int64_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef int16_t narrow_type; +}; + +/** + * @brief Storage type for a fixed point number representable on uint32 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef uint32_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef uint64_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef uint16_t narrow_type; +}; + + +/** + * @brief Storage type for a fixed point number representable on int16 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef int16_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef int32_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef int8_t narrow_type; +}; + +/** + * @brief Storage type for a fixed point number representable on uint16 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef uint16_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef uint32_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef uint8_t narrow_type; +}; + +/** + * @brief Storage type for a fixed point number representable on int8 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef int8_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef int16_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef int8_t narrow_type; +}; + +/** + * @brief Storage type for a fixed point number representable on uint8 + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct fixed_storage_type +{ + //! Storage for the fixed point number + typedef uint8_t value_type; + //! Storage for the widening of this fixed point number datatype + typedef uint16_t wider_type; + //! Storage for the narrowing of this fixed point number datatype + typedef uint8_t narrow_type; +}; + + +/** + * @brief Fixed point template + * + * @tparam M Number of bits for mantissa (without sign bit)# + * @tparam F Number of bits for fractional part + * @tparam S Signed or unsigned + * @tparam T storage datatype + * + */ +template::value_type> +struct Q {}; + +/** + * @brief Signed fixed point datatypes on 64 bits + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = true; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + + //! Maximum representable positive value + constexpr static value_type maxVal = 0x7FFFFFFFFFFFFFFFLL; + + //! Minimum representable negative value + constexpr static value_type minVal = 0x8000000000000000LL; + + /** + * @brief Convert a float to fixed point + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((maxVal >> (63 - F)) )))); + }; + + //! Storage value + value_type v; + + /** + * @brief Create a new zero fixed point + */ + constexpr Q():v(0){}; + + /** + * @brief Create a new fixed point from a raw integer + * @param x the raw integer + */ + constexpr explicit Q(const value_type x):v(x){}; + + /** + * @brief Create a new fixed point from a float + * @param x the float + * @return The fixed point representing the float value with saturation + */ + constexpr static Q f(const float x){return Q(convert(x));} + + /** + * @brief Fixed point number representing 1 + * @return Fixed point representing 1 + */ + constexpr static Q one() {return f(1.0f);}; + + Q(Q&& other)=default; + Q(const Q& other)=default; + Q& operator=(Q&& other)=default; + Q& operator=(const Q& other)=default; + + + /** + * @brief Convert an unsigned fixed point to this fixed point + * @param other The unsigned fixed point number + * + * Some problem may occur since the unsigned may not be representable + * with the less bits required for the sign representation. + * This convertion is not saturating. + */ + constexpr explicit Q(const Q&other) + :v{value_type(other.v)} {}; + + /** + * @brief this == b + * @param b the other fixed point + * @return true if this == b + */ + bool operator==(const Q& b) const + { + return(v == b.v); + } + + /** + * @brief this != b + * @param b the other fixed point + * @return true if this != b + */ + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + /** + * @brief this < b + * @param b the other fixed point + * @return true if this < b + */ + bool operator<(const Q& b) const + { + return(v < b.v); + } + + /** + * @brief this > b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + v += other.v; + return(*this); + } + + /** + * @brief this -= other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator-=(const Q other) + { + v -= other.v; + return(*this); + } + + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (maxVal >> (63 - F))) << "_Q(" << M << "," << F << ")";; + return(stream); + } + +}; + +/** + * @brief Unsigned fixed point datatypes on 64 bits + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = false; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + //! Maximum representable positive value + constexpr static value_type maxVal = 0xFFFFFFFFFFFFFFFFLL; + + /** + * @brief Convert a float to fixed point + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0LL : value_type(f * (float)((maxVal >> (64 - F)))))); + }; + + //! Storage value + value_type v; + + /** + * @brief Create a new zero fixed point + */ + constexpr Q():v(0){}; + + /** + * @brief Create a new fixed point from a raw integer + * @param x the raw integer + */ + constexpr explicit Q(const value_type x):v(x){}; + + /** + * @brief Create a new fixed point from a float + * @param x the float + * @return The fixed point representing the float value with saturation + */ + constexpr static Q f(const float x){return Q(convert(x));} + + /** + * @brief Fixed point number representing 1 + * @return Fixed point representing 1 + */ + constexpr static Q one() {return f(1.0f);}; + + Q(Q&& other)=default; + Q(const Q& other)=default; + Q& operator=(Q&& other)=default; + Q& operator=(const Q& other)=default; + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (maxVal >> (64 - F))) << "_UQ(" << M << "," << F << ")";; + return(stream); + } + + /** + * @brief this == b + * @param b the other fixed point + * @return true if this == b + */ + bool operator==(const Q& b) const + { + return(v == b.v); + } + + /** + * @brief this != b + * @param b the other fixed point + * @return true if this != b + */ + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + /** + * @brief this < b + * @param b the other fixed point + * @return true if this < b + */ + bool operator<(const Q& b) const + { + return(v < b.v); + } + + /** + * @brief this > b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + +}; + +/** + * @brief Signed fixed point datatypes on 32 bits + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + + //! Is this number representation signed + constexpr static bool isSigned = true; + + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + + //! Maximum representable positive value + constexpr static value_type maxVal = 0x7FFFFFFFL; + + //! Minimum representable negative value + constexpr static value_type minVal = 0x80000000L; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other): + v{value_type(other.v)} {}; + + /** + * @brief this == b + * @param b the other fixed point + * @return true if this == b + */ + bool operator==(const Q& b) const + { + return(v == b.v); + } + + /** + * @brief this != b + * @param b the other fixed point + * @return true if this != b + */ + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + /** + * @brief this < b + * @param b the other fixed point + * @return true if this < b + */ + bool operator<(const Q& b) const + { + return(v < b.v); + } + + /** + * @brief this > b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + v = __QADD(v,other.v); + return(*this); + } + + + /** + * @brief this -= other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator-=(const Q other) + { + v = __QSUB(v,other.v); + return(*this); + } + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = false; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + + //! Maximum representable positive value + constexpr static value_type maxVal = 0xFFFFFFFFL; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1< b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } +}; + +/** + * @brief Signed fixed point datatypes on 16 bits + * + * @tparam M Number of bits for mantissa (without sign bit) + * @tparam F Number of bits for fractional part + * + */ +template +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + + //! Is this number representation signed + constexpr static bool isSigned = true; + + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + + //! Maximum representable positive value + constexpr static value_type maxVal = 0x7FFF; + + //! Minimum representable negative value + constexpr static value_type minVal = 0x8000; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other):v{value_type(other.v)} {}; + + /** + * @brief this == b + * @param b the other fixed point + * @return true if this == b + */ + bool operator==(const Q& b) const + { + return(v == b.v); + } + + /** + * @brief this != b + * @param b the other fixed point + * @return true if this != b + */ + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + /** + * @brief this < b + * @param b the other fixed point + * @return true if this < b + */ + bool operator<(const Q& b) const + { + return(v < b.v); + } + + /** + * @brief this > b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,16); + #else + v = (value_type) __QADD16(v, other.v); + #endif + return(*this); + } + + /** + * @brief this -= other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator-=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v - other.v,16); + #else + v = (value_type) __QSUB16(v, other.v); + #endif + return(*this); + } + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / (((wider_type)1)< +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = false; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + //! Maximum representable positive value + constexpr static value_type maxVal = 0xFFFF; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1< b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + v = __USAT((value_type)v + other.v,16); + return(*this); + } + + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = true; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + //! Maximum representable positive value + constexpr static value_type maxVal = 0x7F; + //! Minimum representable negative value + constexpr static value_type minVal = 0x80; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : (i<(value_type)minVal ? minVal : i)); + }; + + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= -1.0f ? minVal : value_type(f * (float)((wider_type)1<&other):v{value_type(other.v)} {}; + + /** + * @brief this == b + * @param b the other fixed point + * @return true if this == b + */ + bool operator==(const Q& b) const + { + return(v == b.v); + } + + /** + * @brief this != b + * @param b the other fixed point + * @return true if this != b + */ + bool operator!=(const Q& b) const + { + return(v != b.v); + } + + /** + * @brief this < b + * @param b the other fixed point + * @return true if this < b + */ + bool operator<(const Q& b) const + { + return(v < b.v); + } + + /** + * @brief this > b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,8); + #else + v = (value_type) __QADD8(v, other.v); + #endif + return(*this); + } + + /** + * @brief this -= other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator-=(const Q other) + { + #if !defined(ARM_MATH_DSP) + v = __SSAT((value_type)v + other.v,8); + #else + v = (value_type) __QSUB8(v, other.v); + #endif + return(*this); + } + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1< +struct Q { + //! Number of fractional bits + constexpr static int fracBits = F; + //! Number of mantissa bits (without sign bit) + constexpr static int mantissaBits = M; + //! Is this number representation signed + constexpr static bool isSigned = false; + //! Storage type for the value + using value_type = typename fixed_storage_type::value_type; + //! Storage type for the widening of the value + using wider_type = typename fixed_storage_type::wider_type; + + //! Maximum representable positive value + constexpr static value_type maxVal = 0xFF; + + /** + * @brief Saturate a wider type to the current type + * + * @param i the wider integer type + * @return the saturated value + * + */ + constexpr static value_type sat(const wider_type i) { + return (i > (value_type)maxVal ? maxVal : i); + }; + + /** + * @brief Convert a float to fixed point with saturation + * + * @param f float value + * @return the fixed point value in the storage type + * + */ + constexpr static value_type convert(const float f) { + return(f >= 1.0f ? maxVal : (f <= 0.0f ? 0 : value_type(f * (float)((wider_type)1< b + * @param b the other fixed point + * @return true if this > b + */ + bool operator>(const Q& b) const + { + return(v > b.v); + } + + /** + * @brief this <= b + * @param b the other fixed point + * @return true if this <= b + */ + bool operator<=(const Q& b) const + { + return(v <= b.v); + } + + /** + * @brief this >= b + * @param b the other fixed point + * @return true if this >= b + */ + bool operator>=(const Q& b) const + { + return(v >= b.v); + } + + /** + * @brief this += other + * @param other the other fixed point + * @return true if this += other + */ + Q & operator+=(const Q other) + { + v = __USAT((value_type)v + other.v,8); + return(*this); + } + + /** + * @brief Display fixed point number for debug purpose + * @param stream Output stream + * @param other The fixed point to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Q& other) { + stream << double(1.0*other.v / ((wider_type)1<; + +//! Q31 datatype +using Q31 = Q<0,31>; + +//! Q15 datatype +using Q15 = Q<0,15>; + +//! Q7 datatype +using Q7 = Q<0,7>; + +/** + * @brief q63 literal + * @param x long double value + * @return Q63 value + * + * You can write + * \code{.cpp} + * Q63 b = 0.4_q63; + * \endcode + * + * The float is converted to Q63 at build time. + * + */ +constexpr Q63 operator ""_q63(long double x){return Q63(Q63::convert((float)x));} + +/** + * @brief q31 literal + * @param x long double value + * @return Q31 value + * + * You can write + * \code{.cpp} + * Q31 b = 0.4_q31; + * \endcode + * + * The float is converted to Q31 at build time. + * + */ +constexpr Q31 operator ""_q31(long double x){return Q31(Q31::convert((float)x));} + +/** + * @brief q15 literal + * @param x long double value + * @return Q15 value + * + * You can write + * \code{.cpp} + * Q15 b = 0.4_q15; + * \endcode + * + * The float is converted to Q15 at build time. + * + */ +constexpr Q15 operator ""_q15(long double x){return Q15(Q15::convert((float)x));} + +/** + * @brief q7 literal + * @param x long double value + * @return Q7 value + * + * You can write + * \code{.cpp} + * Q7 b = 0.4_q7; + * \endcode + * + * The float is converted to Q7 at build time. + * + */ +constexpr Q7 operator ""_q7(long double x){return Q7(Q7::convert((float)x));} + + +/** + * @brief Multiplication of two fixed point numbers A and B + * @tparam MA Number of mantissa bits for A + * @tparam FA Number of fractional bits for A + * @tparam MB Number of mantissa bits for B + * @tparam FB Number of fractional bits for B + * @tparam SA Is A using a signed representation + * @tparam SB Is B using a signed representation + * @param a First fixed point number + * @param b Second fixed point number + * @return return the product of the two fixed point (and use adapted type) + * + * + */ +template +inline Q< MA+MB+1 , FA+FB,SA || SB> mult(const Q &a, + const Q &b) +{ + /* + + Why mantissa of result is MA + MB + 1. + If we take as example, Q7 * Q7 and we multiply + 0x80 * 0x80 (-128 * -128) we get 0x4000 and if we shift right by 7 + we get 0x080 (on 9 bits). If the additional mantissa bit was not + kept, we would get 0x80 (on 8 bits) which would mean a negative + number. + + Saturation of 0x080 (on 9 bits) will give 0x7F whereas + saturation of 0x80 (on 8 bits) would keep 0x80 and thus + the wrong sign. + + By using MA + MB + 1 we ensure that Q7 * Q7 is Q<1,14> + and not Q<0,14>. + + To convert Q<1,14> to Q<0,7> we need a toFract and a saturate. + + */ + using ResType = typename Q< MA+MB+1 , FA+FB,SA || SB>::value_type; + ResType res = ((ResType)a.v * (ResType)b.v); + return(Q(res)); +} + + +/** + * @brief Add two fixed point numbers with saturation + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @param b Second fixed point number + * @return return the sum with saturation (if supported by the datatype) + * + * + */ +template +inline Q operator+(const Q &a,const Q &b) +{ + Q ret(a); + ret+=b; + return ret; +} + +/** + * @brief Subtract two fixed point numbers with saturation + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @param b Second fixed point number + * @return return the subtraction with saturation (if supported by the datatype) + * + * + */ +template +inline Q operator-(const Q &a,const Q &b) +{ + Q ret(a); + ret-=b; + return ret; +} + +/** + * @brief Negate a fixed point number with saturation + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @return return negation with saturation (if supported by the datatype) + * + * + */ +template +inline Q operator-(const Q &a) +{ + Q ret; + ret-=a; + return ret; +} + +// Unsaturating add +/** + * @brief Add two fixed point numbers without saturation + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @param b Second fixed point number + * @return return the sum without saturation + * + * + */ +template +inline Q add(const Q &a,const Q &b) +{ + return Q(a.v + b.v); +} + +// Unsaturating sub +/** + * @brief Subtract two fixed point numbers without saturation + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @param b Second fixed point number + * @return return the subtraction without saturation + * + * + */ +template +inline Q sub(const Q &a,const Q &b) +{ + return Q(a.v - b.v); +} + + +template +constexpr std::integral_constant i_{}; + +/** + * @brief Shift right a fixed point number with a shift known at build time + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @return return the shifted fixed point number + * + * + */ +template +inline Q operator >>(const Q &a, std::integral_constant) noexcept { + return Q(a.v >> N); +} + + +/** + * @brief Shift left a fixed point number with a shift known at build time + * @tparam M Number of mantissa bits for the fixed point number + * @tparam F Number of fractional bits for the fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param a First fixed point number + * @return return the shifted fixed point number + * + * + */ +template +inline Q< M+N , F,S> operator <<(const Q &a, std::integral_constant) noexcept { + using ResType = typename Q::value_type; + return Q(ResType(a.v) << N); +} + + +/** + * @brief Saturate a signed fixed point number + * @tparam MD Number of mantissa bits for the destination fixed point number + * @tparam MS Number of mantissa bits for the source fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param src First fixed point number + * @return return the saturated fixed point number + * + * Only applies if the number is signed, the representation requires less + * than 32 bits (since there is no saturating instruction for 64 bits) and + * if destination has less mantissa bits. + * + * If destination has more or equal number of mantissa bits then it does + * not make sense to saturate. + */ +template +inline Q saturate(const Q &src, + typename std::enable_if<(MD < MS) && ((MD+F)<31)>::type* = nullptr) +{ + return(Q(__SSAT(src.v, MD+F+1))); +} + + +/** + * @brief Saturate an unsigned fixed point number + * @tparam MD Number of mantissa bits for the destination fixed point number + * @tparam MS Number of mantissa bits for the source fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param src The fixed point number + * @return return the saturated fixed point number + * + * Only applies if the number is unsigned, the representation requires less + * than 31 bits (since there is no saturating instruction for 64 bits) and + * if destination has less mantissa bits. + * + * If destination has more or equal number of mantissa bits then it does + * not make sense to saturate. + */ +template +inline Q saturate(const Q &src,typename std::enable_if<(MD < MS) && ((MD+F)<31)>::type* = nullptr) +{ + return(Q(__USAT(src.v, MD+F+1))); +} + + +template +struct FixedCastShift {}; + +/* Positive shift */ + +/** + * @brief Changed fractional representation of a fixed point number using a shift + * @tparam M Number of mantissa bits for the fixed point number + * @tparam FD Number of fractional bits for the destination fixed point number + * @tparam FS Number of fractional bits for the source fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param src The fixed point number + * @return return the fixed point number with different fractional part format + * + * Only applies if FD > FS + */ +template +struct FixedCastShiftFS)> { + constexpr static Q shift(const Q &src) + { + using DstType = typename Q::value_type; + return(Q(DstType(src.v) << (FD-FS))); + } +}; + +/** + * @brief Changed fractional representation of a fixed point number using a shift + * @tparam M Number of mantissa bits for the fixed point number + * @tparam FD Number of fractional bits for the destination fixed point number + * @tparam FS Number of fractional bits for the source fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param src The fixed point number + * @return return the fixed point number with different fractional part format + * + * Only applies if FD < FS + */ +template +struct FixedCastShift { + constexpr static Q shift(const Q &src) + { + using DstType = typename Q::value_type; + using SrcType = typename Q::value_type; + + return(Q(DstType(SrcType(src.v) >> (FS-FD)))); + } +}; + +/** + * @brief Convert between different fractional part formats + * @tparam M Number of mantissa bits for the fixed point number + * @tparam FD Number of fractional bits for the destination fixed point number + * @tparam FS Number of fractional bits for the source fixed point number + * @tparam S Is the fixed point number using a signed representation + * @param src The fixed point number + * @return return the fixed point number with different fractional part format + * + */ +template +inline Q toFrac(const Q &src) +{ + return(FixedCastShift::shift(src)); +} + + +/** + * @brief Accumulation without saturation + * @tparam MD Number of mantissa bits for the destination fixed point number + * @tparam MS Number of mantissa bits for the source fixed point number + * @tparam F Number of fractional bits for fixed point number + * @tparam S Is the fixed point number using a signed representation + * + */ +template +struct Accumulate; + +/** + * @brief Accumulation without saturation + * @tparam MD Number of mantissa bits for the destination fixed point number + * @tparam MS Number of mantissa bits for the source fixed point number + * @tparam F Number of fractional bits for fixed point number + * @tparam S Is the fixed point number using a signed representation + * + */ +template +struct Accumulate { + /** + * @brief Accumulation without saturation + * + * @param[in] a first fixed point number + * @param[in] b second fixed point number + * + * @return The sum of both fixed point number with more + * matissa bits. + */ + static Q acc (const Q &a,const Q &b) + { + using DstType = typename Q::value_type; + return(Q(DstType(a.v) + DstType(b.v))); + } +}; + +/** + * @brief Accumulate without saturation + * + * @param[in] a First fixed point number + * @param[in] b Second fixed point number + * + * @tparam MD Number of mantissa bits for destination + * @tparam MS Number of mantissa bits fro source + * @tparam F Number of fractional bits + * @tparam S Is the representation signed + * + * @return Sum of two numbers without saturation and using the + * destination number of mantissa bits + */ +template +inline Q accumulate(const Q &a,const Q &b) +{ + return(AccumulateMS)>::acc(a,b)); +} + + +template +inline Q _abs(const Q a) +{ + using DestType = typename Q::value_type; + return(Q(DestType(abs(a.v)))); +} + +/** + * @brief Multiplication operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @return The result of the multiplication with saturation + */ +inline Q7 operator*(const Q7 &a,const Q7 &b) +{ + return(saturate(toFrac<7>(mult(a,b)))); +} + +/** + * @brief Multiplication operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @return The result of the multiplication with saturation + */ +inline Q15 operator*(const Q15 &a,const Q15 &b) +{ + return (saturate(toFrac<15>(mult(a,b)))); +} + +/** + * @brief Multiplication operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @return The result of the multiplication with saturation + */ +inline Q31 operator*(const Q31 &a,const Q31 &b) +{ + return (toFrac<31>(saturate(toFrac<30>(mult(a,b))))); +} + +/** + * @brief Greater-than comparison operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * + * @return The result of the greater-than comparison + */ +template +inline bool operator>(const Q &a,const Q &b) +{ + return(a.v>b.v); +} + +/** + * @brief Less-than comparison operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * + * @return The result of the less-than comparison + */ +template +inline bool operator<(const Q &a,const Q &b) +{ + return(a.v +inline bool operator>=(const Q &a,const Q &b) +{ + return(a.v>=b.v); +} + +/** + * @brief Less-than-or-equal comparison operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * + * @return The result of the less-than-or-equal comparison + */ +template +inline bool operator<=(const Q &a,const Q &b) +{ + return(a.v<=b.v); +} + +/** + * @brief Equality operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * + * @return The result of the equality + */ +template +inline bool operator==(const Q a,const Q b) +{ + return(a.v==b.v); +} + +/** + * @brief Inequality operator. + * + * @param[in] a First value + * @param[in] b Second value + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * + * @return The result of the inequality + */ +template +inline bool operator!=(const Q a,const Q b) +{ + return(a.v!=b.v); +} + +/** + * @brief Division operator. + * + * @param[in] a First fixed point value + * @param[in] b Integer + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * @tparam S Is representation signed + * + * @return The result of the division + */ +template +inline Q operator/(const Q a,const int32_t b) +{ + return(Q(a.v / b)); +} + +/** + * @brief No op operator. + * + * @param[in] a Fixed point number + * + * @tparam M Number of mantissa bits + * @tparam F Number of fractional bits + * @tparam S Is the representation signed + * + * @return The result of the addition + */ +template +inline Q operator+(const Q &a) +{ + return(a); +} + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/forward.hpp b/dsppp/Include/dsppp/forward.hpp new file mode 100644 index 00000000..012953dc --- /dev/null +++ b/dsppp/Include/dsppp/forward.hpp @@ -0,0 +1,149 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +namespace arm_cmsis_dsp { + +template +struct Vector_Base; + +template +struct VectorView; + +template typename Allocator> +struct Vector; + +template typename Allocator> +struct Matrix; + +template +struct MatrixView; + +template +struct NbRows; + +template +struct NbCols; + +template +struct Complexity; + +template +struct OutputVectorDim; + +template +struct CompatibleStaticMatMatProduct; + +template +struct CompatibleStaticMatVecProduct; + +template +struct CompatibleDynamicMatVecProduct; + +template +struct CompatibleDynamicMatMatProductStaticStride; + +template +struct CompatibleDynamicMatMatProductDynamicStride; + +template +struct CompatibleDynamicMatMatProduct; + +template +struct OutputVector; + +template +struct OutputMatrix; + + + +/* + +Identifications + +*/ + +/* + +Is a contiguous array in memory with scalar indexing +(operator[]) +It can be an _Expr +Vector has a length + +Generally used whe scalar indexing is required or length + +*/ +template +struct IsVector; + +/* + +Has matrix indexing (operator()) +and matrix operations like transpose, identity. +So it cannot be an _Expr because _Expr has no transpose, identity +Has rows, columns +Matrix may be vectors Vectors (with above definition) + +Generally used when transpose or identity are required. + +*/ +template +struct IsMatrix; + +/* + +Has matrix indexing (operator()) +but no matrix operator like transpose. +It can be an Expr +Has rows, columns +It may not always be a Vector (MatrixView are not contiguous) + +Generally used only when matrix indexing is mandatory + +*/ +template +struct HasMatrixIndexing; + +/* + + +Type Matrix : IsVector, IsMatrix, HasMatrixIndexing +Type MatrixView : , IsMatrix, HasMatrixIndexing +Type _Expr with Matrix : IsVector, , HasMatrixIndexing +Type _Expr with some MatrixView : HasMatrixIndexing + +*/ + + +/* + +Dimensions only known at runtime + +*/ +template +struct IsDynamic; + +/* + +StaticLength if known at build time otherwise 0 +*/ +template +struct StaticLength; + +/* + +Type of elements in vector or matrix + +*/ +template +struct ElementType; + +template +struct HasStaticStride; + +template +struct StaticStride; + +} diff --git a/dsppp/Include/dsppp/fusion.hpp b/dsppp/Include/dsppp/fusion.hpp new file mode 100644 index 00000000..96c8e4e7 --- /dev/null +++ b/dsppp/Include/dsppp/fusion.hpp @@ -0,0 +1,953 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +namespace arm_cmsis_dsp { + +/** \addtogroup FUSION Abstract syntax tree for fusion + * \ingroup DSPPP + * @{ + */ + +template struct traits +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + +template +struct Complexity +{ + constexpr static int value = 0; +}; + +/* + +An unregular datatype has different strides like MatrixView +and can only be assigned to a MatrixView using a 2D functions. +So all normal eval functions will reject unregular structures. + +*/ +template +struct HasMatrixIndexing +{ + constexpr static bool value = false; +}; + +template +struct HasStaticStride +{ + constexpr static bool value = false; +}; + + + +template +struct IsVector +{ + constexpr static bool value = false; +}; + +template +struct IsMatrix +{ + constexpr static bool value = false; +}; + + + +template +struct StaticLength +{ + constexpr static vector_length_t value = 0; +}; + +template +struct ElementType +{ + typedef T type; +}; + + +template +using SameElementType=std::is_same::type,typename ElementType::type>; + +/** + * @brief Determines if vector datatype supports vector instruction + * on a current architecture + * + * @tparam DA Datatype + * + * @return True if has vector instructions + */ +template +constexpr bool has_vector_inst() {return (vector_traits::type>::has_vector);} + +/** + * @brief Determines if datatype has predicated loop for current architecture + * + * @tparam DA Datatype + * + * @return True if has predicated loops + */ +template +constexpr bool has_predicate_inst() {return (vector_traits::type>::has_predicate);} + +/** + * @brief Determines if scalar datatype (not vector, vectorview, matrix, matrixview) + * + * @tparam DA { description } + * + * @return True if scalar, False otherwise. + */ +template +constexpr bool is_scalar() {return (!IsVector::value && + !HasMatrixIndexing::value);} + +/** + * @brief Check if datatype can only be used as a matrix (no vector addressing) + * + * @tparam DA Datatype + * + * @return True if can only be used as a matrix (no vector indexing) + */ +template +constexpr bool must_use_matrix_idx() {return (!IsVector::value && + HasMatrixIndexing::value);} +/** + * @brief Check if both datatype have vector indexing are + * same scalar datatype + * + * @tparam DA First datatype + * @tparam DB Second datatype + * + * @return True if both datatype have vectro indexing and same scalar type + */ +template +constexpr bool vector_idx_pair() {return (IsVector::value && + IsVector::value && + SameElementType::value);} + +// By default scalar has no vector size so can't be used +// to infer a size at build time. They are considered as dynamic +// Otherwise, by default vectors are considered static +// except is there is a specialization of this template +// (and that is the case for dynamic vectors) +template +struct IsDynamic +{ + constexpr static bool value = is_scalar(); +}; + +/* + +Vector only not including matrixes (which are also vectors) + +*/ + +/** + * @brief Check if has vector indexing + * + * @tparam DA Datatype + * + * @return True if dtatype supports vector indexing + */ +template +constexpr bool is_only_vector() {return (IsVector::value && + !HasMatrixIndexing::value);} + +/** + * @brief Check if datatypes have same scalar datatype and no vector indexing + * + * @tparam DA First datatype + * @tparam DB Second datatype + * + * @return True if datatypes have same scalar datatype and no vector indexing + */ +template +constexpr bool must_use_matrix_idx_pair() {return ((must_use_matrix_idx() || must_use_matrix_idx()) && + SameElementType::value);} + + +/* + +Static length is 0 for scalar and Dynamic vectors +*/ + +/** + * @brief Static length + * + * @tparam DA First datatype + * @tparam DB Second datatype + * + * @return Return the static length of the first datatype having + * a static length in the pair. + */ +template +constexpr vector_length_t static_length() { + return ((StaticLength::value==0) ? StaticLength::value : StaticLength::value); +} + +/* + +False only when DA and DB are static vector and with differet size +Anyother case is ok. + +*/ + +/** + * @brief Check compatibility of length + * + * @tparam DA First datatype + * @tparam DB Second datatype + * + * @return False only when DA and DA have different static lengths. + * All other cases are True. + */ +template +constexpr bool same_static_length() +{ + return((StaticLength::value == 0) || /* Scalar or dynamic case */ + (StaticLength::value == 0) || /* Scalar or dynamic case */ + (StaticLength::value == StaticLength::value)); +} +/* + +Vector operators at instruction level + +*/ +#include "fusion_ops.hpp" + + +/** + * @brief Expression template + * + * @tparam T Datatype representing the expression + * + */ +template +struct _Expr { + + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + + /** + * @brief Derived datatype + * + * @return Return the derived datatype + */ + T& derived() {return(static_cast(*this));} + + /** + * @brief Derived datatype + * + * @return Return the derived datatype + */ + T const& derived() const {return(static_cast(*this));} + + /** + * @brief Vector indexing in the expression + * + * @param[in] i Index + * + * @return The result of the vector indexer + */ + Scalar const operator[](const index_t i) const {return(this->derived()[i]);} + + /** + * @brief Matrix indexing + * + * @param[in] r Row index + * @param[in] c Column index + * + * @return Element at index + */ + Scalar const operator()(const index_t r,const index_t c) const {return(this->derived()(r,c));} + +#if defined(HAS_VECTOR) + /** + * @brief Vector operation at given index + * + * @param[in] i Vector index + * + * @return Evaluation of vector at the index + */ + Vector const vector_op(const index_t i) const {return(this->derived().vector_op(i));} + + /** + * @brief Vector operation at index with loop predicate + * + * @param[in] i Vector index + * @param[in] remaining Remaining elements in the loop + * + * @return Evaluation of vector at index with tail predication + */ + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const {return(this->derived().vector_op_tail(i,remaining));} + + /** + * @brief Matrix operation at index + * + * @param[in] r row index + * @param[in] c column index + * + * @return Evaluation of matrix expression at index + */ + Vector const matrix_op(const index_t r,const index_t c) const {return(this->derived().matrix_op(r,c));} + + /** + * @brief Matrix operation at index with tail predication + * + * @param[in] r row index + * @param[in] c column index + * @param[in] remaining Remaining elements in the loop + * + * @return Evaluation of matrix operation at index + */ + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const {return(this->derived().matrix_op_tail(r,c,remaining));} +#endif + + /** + * @brief Length of result + * + * @return The vector length. + */ + vector_length_t length() const {return(this->derived().length());} + + /** + * @brief Number of rows for result + * + * @return Number of rows + */ + vector_length_t rows() const {return(this->derived().rows());} + + /** + * @brief Number of columns for result + * + * @return Number of columns + */ + vector_length_t columns() const {return(this->derived().columns());} + + virtual ~_Expr(){}; + +protected: + _Expr() = default; + _Expr(const _Expr&) = default; + _Expr(_Expr&&) = default; + _Expr& operator=(const _Expr& other) = delete; + _Expr& operator=(_Expr&& other) = delete; +}; + +/***************** + * + * BINARY AST + */ + +/** + * @brief Expression for binary operator + * + * @tparam LHS Left hand side datatype + * @tparam RHS Right hand side datatype + * @tparam DerivedOp Operator for the binary operation + * + */ +template +struct _Binary: _Expr<_Binary> +{ + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + _Binary(const LHS &lhs, + const RHS &rhs, + const _BinaryOperator &op): + lhs_(lhs),rhs_(rhs),op_(op){ + } + + + _Binary(const _Binary &other): + lhs_(other.lhs_),rhs_(other.rhs_),op_(other.op_){ + } + + _Binary& operator=(const _Binary& other) = delete; + _Binary& operator=(_Binary&& other) = delete; + + _Binary(_Binary &&other): + lhs_(std::move(other.lhs_)),rhs_(std::move(other.rhs_)),op_(std::move(other.op_)) + { + } + + template::value,bool>::type = true> + vector_length_t length() const { + return(lhs_.length()); + } + + template::value && IsVector::value,bool>::type = true> + vector_length_t length() const { + return(rhs_.length()); + } + + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.rows()); + } + + template::value && HasMatrixIndexing::value,bool>::type = true> + vector_length_t rows() const { + return(rhs_.rows()); + } + + template::value,bool>::type = true> + vector_length_t columns() const { + return(lhs_.columns()); + } + + template::value && HasMatrixIndexing::value,bool>::type = true> + vector_length_t columns() const { + return(rhs_.columns()); + } + + + + template::value && + IsVector::value,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i],rhs_[i])); + } + + template::value && + is_scalar(),bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i],rhs_)); + } + + template() && + IsVector::value,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_,rhs_[i])); + } + + template::value && + HasMatrixIndexing::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c),rhs_(r,c))); + } + + template() && + HasMatrixIndexing::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_,rhs_(r,c))); + } + + template::value && + is_scalar(),bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c),rhs_)); + } + +#if defined(HAS_VECTOR) + /* V + V */ + template::value && + IsVector::value,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i),rhs_.vector_op(i))); + } + + template() && + IsVector::value && + IsVector::value,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),rhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + /* V + S */ + template::value && + is_scalar(),bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i),rhs_)); + } + + template() && + IsVector::value && + is_scalar(),bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),rhs_,inner::vctpq::mk(remaining))); + } + + + + /* S + V */ + template() && + IsVector::value,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_,rhs_.vector_op(i))); + } + + template() && + is_scalar() && + IsVector::value,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_,rhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + + /************* + * + * For matrix + * + */ + + /* V + V */ + template::value && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c),rhs_.matrix_op(r,c))); + } + + template() && + HasMatrixIndexing::value && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),rhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + /* V + S */ + template::value && + is_scalar(),bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c),rhs_)); + } + + template() && + HasMatrixIndexing::value && + is_scalar(),bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),rhs_,inner::vctpq::mk(remaining))); + } + + + + /* S + V */ + template() && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_,rhs_.matrix_op(r,c))); + } + + template() && + is_scalar() && + HasMatrixIndexing::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_,rhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const RHS rhs_; + const _BinaryOperator op_; +}; + +template +struct Complexity<_Expr> +{ + constexpr static int value = Complexity::value; +}; + +template +struct ElementType<_Expr> +{ + typedef typename ElementType::type type; +}; + +template +struct Complexity<_Binary> +{ + constexpr static int lhsv = Complexity::value; + constexpr static int rhsv = Complexity::value; + constexpr static int value = lhsv + rhsv + 1; +}; + +template +struct ElementType<_Binary> +{ + typedef typename ElementType::type type; +}; + + +template +struct IsVector<_Expr> +{ + constexpr static bool value = IsVector::value; +}; + +template +struct HasMatrixIndexing<_Expr> +{ + constexpr static bool value = HasMatrixIndexing::value; +}; + +template +struct IsVector<_Binary> +{ + constexpr static bool value = + (IsVector::value && IsVector::value) || + (IsVector::value && is_scalar()) || + (is_scalar() && IsVector::value); +}; + +template +struct HasMatrixIndexing<_Binary> +{ + constexpr static bool value = + (HasMatrixIndexing::value && HasMatrixIndexing::value) || + (HasMatrixIndexing::value && is_scalar()) || + (is_scalar() && HasMatrixIndexing::value); +}; + +template +struct IsDynamic<_Expr> +{ + constexpr static bool value = IsDynamic::value; +}; + +template +struct IsDynamic<_Binary> +{ + constexpr static bool value = IsDynamic::value && IsDynamic::value; +}; + +template +struct StaticLength<_Expr> +{ + constexpr static vector_length_t value = StaticLength::value; +}; + +template +struct StaticLength<_Binary> +{ + constexpr static vector_length_t value = static_length(); + +}; + +template +struct traits<_Expr> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + +template +struct traits<_Binary> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + + +/***************** + * + * UNARY AST + */ + +/** + * @brief Expression for unary operator + * + * @tparam LHS Left hand side datatype + * @tparam DerivedOp Operator for the binary operation + * + */ +template +struct _Unary: _Expr<_Unary> +{ + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + using Vector = typename traits::Vector; +#endif + _Unary(const LHS &lhs, + const _UnaryOperator &op): + lhs_(lhs),op_(op){ + } + + _Unary(const _Unary &other): + lhs_(other.lhs_),op_(other.op_){ + } + + _Unary(LHS &&other): + lhs_(std::move(other.lhs_)),op_(std::move(other.op_)){ + } + + _Unary& operator=(const _Unary& other) = delete; + _Unary& operator=(_Unary&& other) = delete; + + + vector_length_t length() const { + return(lhs_.length()); + } + + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.rows()); + } + + template::value,bool>::type = true> + vector_length_t columns() const { + return(lhs_.columns()); + } + + template::value ,bool>::type = true> + Scalar const operator[](const index_t i) const { + return(op_(lhs_[i])); + } + + template::value ,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_(r,c))); + } + + +#if defined(HAS_VECTOR) + /* V */ + template::value ,bool>::type = true> + Vector const vector_op(const index_t i) const + { + return(op_(lhs_.vector_op(i))); + } + + template() && + IsVector::value ,bool>::type = true> + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(op_(lhs_.vector_op_tail(i,remaining),inner::vctpq::mk(remaining))); + } + + /* + + For Matrix + + */ + + /* V */ + template::value ,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_.matrix_op(r,c))); + } + + template() && + HasMatrixIndexing::value ,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_.matrix_op_tail(r,c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const _UnaryOperator op_; +}; + +template +struct Complexity<_Unary> +{ + constexpr static int value = 1 + Complexity::value; +}; + +template +struct ElementType<_Unary> +{ + typedef typename ElementType::type type; +}; + +template +struct IsVector<_Unary> +{ + constexpr static bool value = IsVector::value; +}; + +template +struct HasMatrixIndexing<_Unary> +{ + constexpr static bool value = HasMatrixIndexing::value; +}; + +template +struct IsDynamic<_Unary> +{ + constexpr static bool value = IsDynamic::value; +}; + +template +struct StaticLength<_Unary> +{ + constexpr static vector_length_t value = StaticLength::value; +}; + + +template +struct traits<_Unary> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + + + + +/* + +Dot product + +*/ + +template +using DotResult = typename number_traits::Scalar>::accumulator; + + + +/** + * @brief Dot product + * + * @tparam VA Left hand side vector datatype + * @tparam VB Right hand side vector datatype + * @param a left hand side vector + * @param b right hand side vector + * @return The dot product + * + * The vector can be vector, vector views or expressions. + * + */ +template() && + is_only_vector() && + is_only_vector() && + (!IsDynamic::value || !IsDynamic::value),bool>::type = true> +inline DotResult dot(const VA& a, + const VB& b) +{ + constexpr vector_length_t l = static_length(); + return(_dot(a,b,l,CURRENT_ARCH)); +} + + +template() && + is_only_vector() && + is_only_vector() && + (IsDynamic::value && IsDynamic::value),bool>::type = true> +inline DotResult dot(const VA& a, + const VB& b) +{ + const vector_length_t l = a.length(); + + return(_dot(a,b,l,CURRENT_ARCH)); +} + + + + +/** + * @brief Swap vectors + * + * @tparam VA Left hand side vector datatype + * @tparam VB Right hand side vector datatype + * @param a left hand side vector + * @param b right hand side vector + * + * The vector can be vector, vector views or expressions. + * + * The content of vector is swapped. + * + */ +template() && + (!IsDynamic::value || !IsDynamic::value),bool>::type = true> +inline void swap(VA&& a, + VB&& b) +{ + constexpr vector_length_t l = static_length(); + + _swap(std::forward(a),std::forward(b),l,CURRENT_ARCH); +} + + +template() && + (IsDynamic::value && IsDynamic::value),bool>::type = true> +inline void swap(VA&& a, + VB&& b) +{ + const vector_length_t l = a.length(); + + _swap(std::forward(a),std::forward(b),l,CURRENT_ARCH); +} + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/fusion_ops.hpp b/dsppp/Include/dsppp/fusion_ops.hpp new file mode 100644 index 00000000..b79410df --- /dev/null +++ b/dsppp/Include/dsppp/fusion_ops.hpp @@ -0,0 +1,405 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup FUSION + * @{ + */ + +/** + * @brief Unary operator + * + * @tparam Scalar Datatype for scalar + * @tparam Derived Datatype representing the operator expression + * + */ +template +struct _UnaryOperator{ + Derived& derived() {return(static_cast(*this));} + + Derived const& derived() const {return(static_cast(*this));} + + Scalar const operator()(const Scalar lhs) const + { + return(this->derived()(lhs)); + } + + #if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(this->derived()(lhs)); + } + + /* + + Predicated operation when exists (Helium) + + */ + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs,const pred_t p0) const + { + return(this->derived()(lhs,p0)); + } + + /* + Vector const to_vector(const Scalar lhs) const + { + return(this->derived().to_vector(lhs)); + } + */ +#endif +}; + +/** + * @brief Unary operator + * + * @tparam Scalar Datatype for scalar + * @tparam Derived Datatype representing the operator expression + * + */ +template +struct _BinaryOperator{ + Derived& derived() {return(static_cast(*this));} + + Derived const& derived() const {return(static_cast(*this));} + + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const + { + return(this->derived()(lhs,rhs)); + } + + #if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(this->derived()(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(this->derived()(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(this->derived()(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(this->derived()(lhs,rhs,p0)); + } +#endif +}; + +/* + * + * BINARY + * + */ + +/** + * @brief Add operator + * + * @tparam Scalar Datatype for scalar + * + */ +template +struct _AddOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs + rhs); + } + +#if defined(HAS_VECTOR) + using Vector=typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vadd(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vadd(lhs,rhs,p0)); + } +#endif +}; + +/** + * @brief Sub operator + * + * @tparam Scalar Datatype for scalar + * + */ +template +struct _SubOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs - rhs); + } + +#if defined(HAS_VECTOR) + using Vector=typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vsub(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vsub(lhs,rhs,p0)); + } +#endif +}; + + +/** + * @brief Mul operator + * + * @tparam Scalar Datatype for scalar + * + */ +template +struct _MulOp:_BinaryOperator> +{ + Scalar const operator()(const Scalar lhs, + const Scalar rhs) const { + return(lhs * rhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs, + const Vector rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + Vector const operator()(const Vector lhs, + const Scalar rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + Vector const operator()(const Scalar lhs, + const Vector rhs) const + { + return(inner::vmul(lhs,rhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const Scalar rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Scalar lhs, + const Vector rhs, + const pred_t p0) const + { + return(inner::vmul(lhs,rhs,p0)); + } +#endif +}; + +/* + * + * UNARY + * + */ + +/** + * @brief Neg operator + * + * @tparam Scalar Datatype for scalar + * + */ +template +struct _NegOp:_UnaryOperator> +{ + Scalar const operator()(const Scalar lhs) const { + return(-lhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(inner::vneg(lhs)); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const pred_t p0) const + { + return(inner::vneg(lhs,p0)); + } + + +#endif +}; + + +/** + * @brief No operator + * + * @tparam Scalar Datatype for scalar + * + */ +template +struct _NoOp:_UnaryOperator> +{ + Scalar const operator()(const Scalar lhs) const { + return(lhs); + } + +#if defined(HAS_VECTOR) + using Vector= typename vector_traits::vector ; + using pred_t = typename vector_traits::predicate_t; + + Vector const operator()(const Vector lhs) const + { + return(lhs); + } + + template::has_predicate,bool>::type = true> + Vector const operator()(const Vector lhs, + const pred_t p0) const + { + (void)p0; + return(lhs); + } + +#endif +}; + +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/matrix.hpp b/dsppp/Include/dsppp/matrix.hpp new file mode 100644 index 00000000..db73002c --- /dev/null +++ b/dsppp/Include/dsppp/matrix.hpp @@ -0,0 +1,736 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" +#include "matrix_impl.hpp" +#include "matrix_view.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix Matrixes + * \ingroup DSPPP + * @{ + */ + +template typename A> +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template typename A> +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + +template +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + +template typename Allocator> +struct IsVector> +{ + constexpr static bool value = true; +}; + + + +template typename Allocator> +struct HasStaticStride> +{ + constexpr static bool value = (C>0); +}; + +template typename Allocator> +struct StaticStride> +{ + constexpr static index_t value = C; +}; + +template typename Allocator> +struct IsMatrix> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct HasMatrixIndexing> +{ + constexpr static bool value = true; +}; + +template +struct IsMatrix> +{ + constexpr static bool value = true; +}; + +template +struct HasStaticStride> +{ + constexpr static bool value = (S>0); +}; + +template +struct StaticStride> +{ + constexpr static index_t value = S; +}; + +template +struct HasMatrixIndexing> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsVector&> +{ + constexpr static bool value = true; +}; + + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct HasStaticStride&> +{ + constexpr static bool value = (C>0); +}; + +template typename Allocator> +struct StaticStride&> +{ + constexpr static index_t value = C; +}; + +template typename Allocator> +struct HasMatrixIndexing&> +{ + constexpr static bool value = true; +}; + + +template +struct IsMatrix&> +{ + constexpr static bool value = true; +}; + +template +struct HasMatrixIndexing&> +{ + constexpr static bool value = true; +}; + +template +struct HasStaticStride&> +{ + constexpr static bool value = (S>0); +}; + +template +struct StaticStride&> +{ + constexpr static index_t value = S; +}; + +template typename Allocator> +struct ElementType> +{ + typedef P type; +}; + + +template +struct ElementType> +{ + typedef P type; +}; + +template typename Allocator> +struct ElementType&> +{ + typedef P type; +}; + +template +struct ElementType&> +{ + typedef P type; +}; + +template typename Allocator> +struct StaticLength> +{ + constexpr static vector_length_t value = (R*C<0) ? 0 : R*C; +}; + +template +struct StaticLength> +{ + constexpr static vector_length_t value = 0; +}; + +template typename Allocator> +struct StaticLength&> +{ + constexpr static vector_length_t value = (R*C<0) ? 0 : R*C; +}; + +template +struct StaticLength&> +{ + constexpr static vector_length_t value = 0 ; +}; + +template typename Allocator> +struct IsDynamic> +{ + constexpr static bool value = (R<0) || (C<0); +}; + +template +struct IsDynamic> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsDynamic&> +{ + constexpr static bool value = (R<0) || (C<0); +}; + +template +struct IsDynamic&> +{ + constexpr static bool value = true; +}; + +/* + + +Compatibility of vector and matrix dimensions at build time + +*/ + +template +struct NbRows +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct NbRows> +{ + constexpr static vector_length_t value = R; +}; + +template typename Allocator> +struct NbRows&> +{ + constexpr static vector_length_t value = R; +}; + +template +struct NbCols +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct NbCols> +{ + constexpr static vector_length_t value = C; +}; + +template typename Allocator> +struct NbCols&> +{ + constexpr static vector_length_t value = C; +}; + + +template +struct CompatibleStaticMatVecProduct +{ + constexpr static bool value = + is_only_vector() && + HasMatrixIndexing::value && + (NbCols::value == StaticLength::value) && + !IsDynamic::value + && SameElementType::value; + +}; + +/* MB IsMatrix because we need transpose operator */ +template +struct CompatibleStaticMatMatProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + (NbCols::value == NbRows::value) && + !IsDynamic::value && + SameElementType::value; + +}; + +template +struct CompatibleDynamicMatVecProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsDynamic::value && + is_only_vector() && + SameElementType::value; + +}; + +/* MB IsMatrix because we need transpose operator */ +template +struct CompatibleDynamicMatMatProductStaticStride +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + HasStaticStride::value && + SameElementType::value; +}; + +template +struct CompatibleDynamicMatMatProductDynamicStride +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + !HasStaticStride::value && + SameElementType::value; +}; + +template +struct CompatibleDynamicMatMatProduct +{ + constexpr static bool value = + HasMatrixIndexing::value && + IsMatrix::value && + IsDynamic::value && + SameElementType::value; +}; + +template +struct OutputVector { + typedef Vector::Scalar, + OutputVectorDim::value,TMP_ALLOC> type; +}; + +template +struct OutputMatrix { + constexpr static bool dynamic = (NbRows::value < 0) || (NbCols::value < 0); + constexpr static vector_length_t nbrows = dynamic ? DYNAMIC : NbRows::value; + constexpr static vector_length_t nbcols = dynamic ? DYNAMIC : NbCols::value; + + typedef Matrix::Scalar,nbrows,nbcols,TMP_ALLOC> type; +}; + + + +template +struct OutputVectorDim +{ + constexpr static vector_length_t value = DYNAMIC; +}; + +template typename Allocator> +struct OutputVectorDim> +{ + constexpr static vector_length_t value = R; +}; + +template typename Allocator> +struct OutputVectorDim&> +{ + constexpr static vector_length_t value = R; +}; + + +template +struct VecRef> +{ + typedef MatrixView type; + static type ref(const MatrixView&a){ + return(a); + }; +}; + +template typename A> +struct VecRef,((R>0) && (C>0))> +{ + typedef const Matrix& type; + static type ref(const Matrix&a,typename std::enable_if<(R>0) && (C>0)>::type* = nullptr){ + return(a); + }; +}; + +template typename A> +struct VecRef,((R<0) || (C<0))> +{ + typedef MatrixView type; + static type ref(const Matrix&a,typename std::enable_if<(R<0) || (C<0)>::type* = nullptr){ + return(type(a,a.rows(),a.columns())); + }; +}; + + +/***************** + * + * + * Fused matrix operators + * + ****************/ + +/** + * @brief Outer product operator for expressions + * + * @tparam LHS Left hand side datatype + * @tparam RHS Right hand side datatype + * @tparam DerivedOp Operator for the Outer operation + * + * vector `op` vector (including matrix) + */ +template +struct _Outer: _Expr<_Outer> +{ + //! Type of vector elements + using Scalar = typename traits::Scalar; +#if defined(HAS_VECTOR) + //! Type of vector in the architecture + using Vector = typename traits::Vector; +#endif + /** + * @brief Create an Outer operator + * + * @param lhs Left hand side expression + * @param rhs Right hand side expression + * @param op operator + */ + _Outer(const LHS &lhs, + const RHS &rhs, + const _BinaryOperator &op): + lhs_(lhs),rhs_(rhs),op_(op){ + } + + /** + * @brief Create an Outer operator from another operator of same type + * + * @param other the other operator + */ + _Outer(const _Outer &other): + lhs_(other.lhs_),rhs_(other.rhs_),op_(other.op_){ + } + + _Outer& operator=(const _Outer& other) = delete; + _Outer& operator=(_Outer&& other) = delete; + + /** + * @brief Move semantic for _Outer operator + * + * @param other the other operator + */ + _Outer(_Outer &&other): + lhs_(std::move(other.lhs_)),rhs_(std::move(other.rhs_)),op_(std::move(other.op_)) + { + } + + + + /** + * @brief Length of the matrix (seen as vector) resulting from the outer operator + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * + * @return vector dimension + */ + template::value && IsVector::value,bool>::type = true> + vector_length_t length() const { + return(lhs_.length() * rhs_.length()); + } + + /** + * @brief Rows of the matrix + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * + * @return number of rows + */ + template::value,bool>::type = true> + vector_length_t rows() const { + return(lhs_.length()); + } + + + /** + * @brief Columns of the matrix + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * + * @return number of columns + */ + template::value,bool>::type = true> + vector_length_t columns() const { + return(rhs_.length()); + } + + + /** + * @brief Expression value at given position + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * @param r row index + * @param c column index + * + * @return expression value + */ + template::value && + IsVector::value,bool>::type = true> + Scalar const operator()(const index_t r,const index_t c) const + { + return(op_(lhs_[r],rhs_[c])); + } + + +#if defined(HAS_VECTOR) + /* + * + * For matrix + * + */ + + /* V + V */ + + /** + * @brief Expression vector value at given position + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * @param r row index + * @param c column index + * + * @return expression vector value + * + * Vector + Vector (matrix interpreted as a Vector) + */ + template::value && + IsVector::value,bool>::type = true> + Vector const matrix_op(const index_t r,const index_t c) const + { + return(op_(lhs_[r],rhs_.vector_op(c))); + } + + /** + * @brief Expression vector value at given position with tail predication + * @tparam R Right hand side datatype + * @tparam L Left hand side datatype + * @param r row index + * @param c column index + * @param remaining remaining number of samples in loop + * + * @return expression vector value + * + * Vector + Vector (matrix interpreted as a Vector) + */ + template::value && + IsVector::value,bool>::type = true> + Vector const matrix_op_tail(const index_t r,const index_t c,const vector_length_t remaining) const + { + return(op_(lhs_[r],rhs_.vector_op_tail(c,remaining),inner::vctpq::mk(remaining))); + } + + +#endif + const LHS lhs_; + const RHS rhs_; + const _BinaryOperator op_; +}; + +template +struct IsVector<_Outer> +{ + constexpr static bool value = false; +}; + +template +struct HasMatrixIndexing<_Outer> +{ + constexpr static bool value = true; +}; + +template +struct StaticLength<_Outer> +{ + constexpr static vector_length_t value = StaticLength::value * StaticLength::value; +}; + +template +struct IsDynamic<_Outer> +{ + constexpr static vector_length_t value = IsDynamic::value || IsDynamic::value; +}; + +template +struct Complexity<_Outer> +{ + constexpr static int lhsv = Complexity::value; + constexpr static int rhsv = Complexity::value; + constexpr static int value = lhsv + rhsv + 1; +}; + +template +struct ElementType<_Outer> +{ + typedef typename ElementType::type type; +}; + +template +struct traits<_Outer> +{ + typedef typename traits::Scalar Scalar; +#if defined(HAS_VECTOR) + typedef typename traits::Vector Vector; +#endif +}; + +template +struct VecRef<_Outer> +{ + typedef _Outer type; + static type ref(const _Outer&a){ + return(a); + }; +}; + +template +struct NbRows<_Outer> +{ + constexpr static vector_length_t value = NbRows::value; +}; + + +template +struct NbCols<_Outer> +{ + constexpr static vector_length_t value = NbCols::value; +}; + + +/** +* @brief Outer product +* @tparam VA Right hand side datatype +* @tparam VB Left hand side datatype +* @param a Vector a +* @param b Vector b +* +* @return Outer product of a and b +* +*/ +template(),bool>::type = true> +inline auto outer(const VA&a,const VB&b) +{ + //constexpr int NBROWS = StaticLength::value; + //constexpr int NBCOLS = StaticLength::value; + + //using T = typename traits::Scalar; + + //Matrix res; + //_outer(res,a,b); + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Outer>(VecLHS::ref(a),VecRHS::ref(b),_MulOp())); + + +} + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/matrix_impl.hpp b/dsppp/Include/dsppp/matrix_impl.hpp new file mode 100644 index 00000000..cc3f6da6 --- /dev/null +++ b/dsppp/Include/dsppp/matrix_impl.hpp @@ -0,0 +1,1161 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix + * @{ + */ + +/******************** + * + * MATRIX + * + ********************/ + +/** +* @brief Slice +*/ +struct Slice +{ + /** + * @brief Create new slice object + * @param s start index + * @param e stop index + * + */ + Slice(const index_t s,const index_t e):start(s),stop(e){}; + + //! Start index + const index_t start; + + //! Stop index + const index_t stop; +}; + +/** @brief Matrix + * @tparam P Type of the scalar + * @tparam R Number of rows + * @tparam C Number of columns + * @tparam Allocator Memory allocator + */ +template typename Allocator = TMP_ALLOC> +struct Matrix:Vector +{ + /** @brief Number of rows + * @return Number of rows + */ + constexpr vector_length_t rows() const {return(R);} + + /** @brief Number of columns + * @return Number of columns + */ + constexpr vector_length_t columns() const {return(C);} + + /** @brief Number of stride + * @return Number of stride + */ + constexpr uint32_t stride() const {return(C);} + + + /** @brief Create matrix + */ + Matrix():Vector(){}; + + /** @brief Create matrix + * @param init_val Initialization value + */ + explicit Matrix(P init_val):Vector(init_val){}; + + Matrix(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + + /** @brief Create matrix from another matrix using different memory allocator + * @tparam OtherAllocator other memory allocator + * @param other Other matrix + */ + template typename OtherAllocator> + explicit Matrix(const Matrix& other):Vector() + { + eval(*this,+other,(vector_length_t)(R*C),CURRENT_ARCH); + }; + + /* Applies only when the AST does not contain any dynamic MatrixView */ + + /** @brief Create matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * + * Only applies when the expression does not contain any MatrixView since + * matrix view may have a stride and cannot be used as vectors. + */ + template::value,bool>::type = true> + Matrix(const _Expr& other):Vector(other) + { + }; + + /* Applies only when AST is containing any dynamic MatrixView */ + + /** @brief Create matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * + * Applies when contain a matrix view that has a stride and thus force a 2D + * evaluation loop. + */ + template(),bool>::type = true> + Matrix(const _Expr& other):Vector() + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + }; + + /** @brief Assign matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * @return the matrix + * + * Applies when expression does not contain matrix view + */ + template::value,bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval(*this,other.derived(),(vector_length_t)R*C,CURRENT_ARCH); + return(*this); + } + + /* Applies only when AST is containing any dynamic MatrixView */ + /** @brief Assign matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * @return the matrix + * + * Applies when contain a matrix view that has a stride and thus force a 2D + * evaluation loop. + */ + template(),bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + /** @brief Create a matrix view + * @param rs start row + * @param cs start column + * @return matrix view + * + */ + MatrixView sub(const index_t rs,const index_t cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + /** @brief Create a constant matrix view + * @param rs start row + * @param cs start column + * @return matrix view + * + */ + const MatrixView sub(const index_t rs,const index_t cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + /** @brief Create a matrix view + * @param rs Row slice (start and end row) + * @param cs start column + * @return matrix view + * + */ + MatrixView sub(const Slice &rs,const index_t cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols)); + } + + /** @brief Create a constant matrix view + * @param rs Row slice (start and end row) + * @param cs start column + * @return matrix view + * + */ + const MatrixView sub(const Slice &rs,const index_t cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols)); + } + + + /** @brief Create a matrix view + * @param rs Row start index + * @param cs Column slice + * @return matrix view + * + */ + MatrixView sub(const index_t rs,const Slice &cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols)); + } + + /** @brief Create a constant matrix view + * @param rs Row start index + * @param cs Column slice + * @return matrix view + * + */ + const MatrixView sub(const index_t rs,const Slice &cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols)); + } + + /** @brief Create a matrix view + * @param rs Row slice + * @param cs Column slice + * @return matrix view + * + */ + MatrixView sub(const Slice& rs,const Slice& cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols)); + } + + /** @brief Create a constant matrix view + * @param rs Row slice + * @param cs Column slice + * @return matrix view + * + */ + const MatrixView sub(const Slice& rs,const Slice& cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols)); + } + + /** @brief Create a matrix view + * @param rs Row start + * @param re Row end + * @param cs Column start + * @param ce Column end + * @return matrix view + * + */ + MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + /** @brief Create a constant matrix view + * @param rs Row start + * @param re Row end + * @param cs Column start + * @param ce Column end + * @return matrix view + * + */ + const MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) const + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols)); + } + + + Matrix& operator=(const Matrix& other) = default; + + Matrix& operator=(Matrix&& other) = default; + + /** @brief Access matrix element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + P& operator()(const index_t r,const index_t c) + { + return(Vector_Base

::ptr()[r*C+c]); + } + + /** @brief Access matrix element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + P& operator()(const index_t r,const index_t c) const + { + return(Vector_Base

::ptr()[r*C+c]); + } + + + /** + * @brief Display the matrix content for debug purpose + * @param stream Output stream + * @param other The matrix to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Matrix& other) { + int c=0; + for(index_t k=0;k + VectorView row(const index_t i,const index_t start=0,const index_t stop=C) + { + return(VectorView(*this,i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a constant row view vector with stride + * @tparam S stride + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * Default is number of columns + * @return row view vector + * + */ + template + const VectorView row(const index_t i,const index_t start=0,const index_t stop=C) const + { + return(VectorView(*this,i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a column view vector with stride + * @tparam S stride + * @param i column index + * @param start Start index in row + * @param stop Stop index in row + * Default is number of rows + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start=0,const index_t stop=R) + { + return(VectorView(*this,i+stride()*start,i+stride()*stop)); + } + + /** @brief Create a constant column view vector with stride + * @tparam S stride + * @param i column index + * @param start Start index in row + * @param stop Stop index in row + * Default is number of rows + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start=0,const index_t stop=R) const + { + return(VectorView(*this,i+stride()*start,i+stride()*stop)); + } + + /** @brief Create a diagonal matrix + * @tparam RA Number of rows + * @tparam CA Number of columns + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * @return a matrix + * + * Only exists when RA == CA and the size is known at built time + * + */ + template::value && + (RA == CA) && (RA>0) && + SameElementType::value,bool>::type = true> + static Matrix diagonal(const VA& a) + { + Matrix res; + _diagonal(res,a,RA); + return(res); + } + + /** @brief Fill diagonal of a matrix with a vector + * @tparam RA Number of rows + * @tparam CA Number of columns + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * + * Only exists when RA == CA and the size is known at built time + * + */ + template::value && + (RA == CA) && (RA>0) && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,RA); + } + + /** @brief Create an identity matrix + * @tparam RA Number of rows + * @tparam CA Number of columns + * @return a matrix + * + * Only exists when RA == CA and the size is known at built time + * + */ + template0),bool>::type = true> + static Matrix identity() + { + Matrix res; + _identity(res,RA); + return(res); + } + + /** @brief Create a matrix of same type + * @return a matrix + * + */ + Matrix create() const + { + Matrix res; + return(res); + } + + /** @brief Create the transposed matrix + * @return a matrix + * + */ + Matrix transpose() const + { + Matrix res; + transposeTo(res,*this); + return(res); + } + +#if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using VectorType = typename vector_traits

::vector; + + /** + * @brief %Vector store at a given row,column position + * + * @param row row index + * @param col column index + * @param val %Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at row,column in this matrix. + */ + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + Vector_Base

::vector_store(row*C + col,val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at row,column index in this matrix datatype + * with predication + */ + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + Vector_Base

::vector_store_tail(row*C + col,remaining,val); + } + + /** + * @brief %Vector operation at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + * with predication + */ + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(Vector_Base

::vector_op_tail(row*C + col,remaining)); + } +#endif + + /** + * @brief %Vector operation at a given row,column position + * + * @param row row index + * @param col column index + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + */ + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(Vector_Base

::vector_op(row*C + col)); + } +#endif + +}; + +/** @brief Matrix + * @tparam P Type of the scalar + * @tparam Allocator Memory allocator + */ +template typename Allocator> +struct Matrix:Vector +{ + /** @brief Number of rows + * @return Number of rows + */ + vector_length_t rows() const {return(rows_);} + + /** @brief Number of columns + * @return Number of columns + */ + vector_length_t columns() const {return(columns_);} + + /** @brief Number of stride + * @return Number of stride + */ + uint32_t stride() const {return(columns_);} + + /** @brief Create matrix + * @param r number of rows + * @param c number of columns + */ + explicit Matrix(vector_length_t r,vector_length_t c): + Vector(r*c),rows_(r),columns_(c){}; + + /** @brief Create matrix + * @param r number of rows + * @param c number of columns + * @param init_val Initialization value + */ + explicit Matrix(vector_length_t r,vector_length_t c,P init_val): + Vector(r*c,init_val),rows_(r),columns_(c){}; + + Matrix(const Matrix& other) = default; + Matrix(Matrix&& other) = default; + + /** @brief Access matrix element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + P& operator()(const index_t r,const index_t c) + { + return(Vector_Base

::ptr()[r*columns()+c]); + } + + /** @brief Access matrix element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + P& operator()(const index_t r,const index_t c) const + { + return(Vector_Base

::ptr()[r*columns()+c]); + } + + /** @brief Create matrix from another matrix using different memory allocator + * @tparam RK Number of rows + * @tparam CK Number of columns + * @tparam OtherAllocator other memory allocator + * @param other Other matrix + */ + template typename OtherAllocator> + explicit Matrix(const Matrix& other): + Vector(other.rows()*other.columns()), + rows_(other.rows()),columns_(other.columns()) + { + if ((other.rows() == rows()) && (other.columns() == columns())) + { + eval(*this,+other,(vector_length_t)(other.rows()*other.columns()),CURRENT_ARCH); + } + }; + + /** @brief Create matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * + * Only applies when the expression does not contain any MatrixView since + * matrix view may have a stride and cannot be used as vectors. + */ + template::value,bool>::type = true> + Matrix(const _Expr& other):Vector(other), + rows_(other.rows()),columns_(other.columns()) + { + }; + + /** @brief Create matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * + * Applies when contain a matrix view that has a stride and thus force a 2D + * evaluation loop. + */ + template(),bool>::type = true> + Matrix(const _Expr& other): + Vector(other.rows()*other.columns()), + rows_(other.rows()),columns_(other.columns()) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + }; + + /** @brief Assign matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * @return the matrix + * + * Applies when expression does not contain matrix view + */ + template::value,bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval(*this,other.derived(),rows()*columns(),CURRENT_ARCH); + return(*this); + }; + + + /** @brief Assign matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Other matrix + * @return the matrix + * + * Applies when contain a matrix view that has a stride and thus force a 2D + * evaluation loop. + */ + template(),bool>::type = true> + Matrix& operator=(const _Expr& other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + Matrix& operator=(const Matrix& other) = default; + + Matrix& operator=(Matrix&& other) = default; + + /** + * @brief Display the matrix content for debug purpose + * @param stream Output stream + * @param other The matrix to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Matrix& other) { + int c=0; + for(index_t k=0;k::value && + SameElementType::value,bool>::type = true> + static Matrix diagonal(const VA& a) + { + Matrix res(a.length(),a.length()); + _diagonal(res,a,a.length()); + return(res); + } + + /** @brief Fill diagonal of a matrix with a vector + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * + */ + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + /** @brief Create an identity matrix + * @param l Matrix dimension (l x l) + * @return a matrix + * + */ + static Matrix identity(const vector_length_t l) + { + Matrix res(l,l); + _identity(res,l); + return(res); + } + + /** @brief Create a matrix of same type + * @return a matrix + * + */ + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + /** @brief Create the transposed matrix + * @return a matrix + * + */ + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + /** @brief Create a row view with stride 1 + * @param i row index + * @param start Start index in row + * @return row view vector + * + */ + VectorView row(const index_t i,const index_t start=0) + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+this->columns())); + } + + /** @brief Create a row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+stop)); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+this->columns())); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(*this,i*this->stride()+start,i*this->stride()+stop)); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*this->rows(),this->stride()*CS)); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*stop,this->stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*this->rows(),this->stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(*this,i+this->stride()*start,i+this->stride()*stop,this->stride()*CS)); + } + +#if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using VectorType = typename vector_traits

::vector; + + /** + * @brief %Vector store at a given row,column position + * + * @param row row index + * @param col column index + * @param val %Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at row,column in this matrix. + */ + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + Vector_Base

::vector_store(row*stride() + col,val); + } + +#if defined(HAS_PREDICATED_LOOP) + + /** + * @brief %Vector store at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at row,column index in this matrix datatype + * with predication + */ + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + Vector_Base

::vector_store_tail(row*stride() + col,remaining,val); + } + + /** + * @brief %Vector operation at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + * with predication + */ + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(Vector_Base

::vector_op_tail(row*stride() + col,remaining)); + } +#endif + + /** + * @brief %Vector operation at a given row,column position + * + * @param row row index + * @param col column index + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + */ + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(Vector_Base

::vector_op(row*stride() + col)); + } +#endif + + /** @brief Create a matrix view + * @param rs start row + * @param cs start column + * @return matrix view + * + */ + MatrixView sub(const index_t rs,const index_t cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + /** @brief Create a constant matrix view + * @param rs start row + * @param cs start column + * @return matrix view + * + */ + const MatrixView sub(const index_t rs,const index_t cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + /** @brief Create a matrix view + * @param rs Row slice (start and end row) + * @param cs start column + * @return matrix view + * + */ + MatrixView sub(const Slice &rs,const index_t cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols,stride())); + } + + /** @brief Create a constant matrix view + * @param rs Row slice (start and end row) + * @param cs start column + * @return matrix view + * + */ + const MatrixView sub(const Slice &rs,const index_t cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = columns() - cs; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs),nb_rows,nb_cols,stride())); + } + + /** @brief Create a matrix view + * @param rs Row start index + * @param cs Column slice + * @return matrix view + * + */ + MatrixView sub(const index_t rs,const Slice &cs) + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + + /** @brief Create a constant matrix view + * @param rs Row start index + * @param cs Column slice + * @return matrix view + * + */ + const MatrixView sub(const index_t rs,const Slice &cs) const + { + const vector_length_t nb_rows = rows() - rs; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + /** @brief Create a matrix view + * @param rs Row slice + * @param cs Column slice + * @return matrix view + * + */ + MatrixView sub(const Slice& rs,const Slice& cs) + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + /** @brief Create a constant matrix view + * @param rs Row slice + * @param cs Column slice + * @return matrix view + * + */ + const MatrixView sub(const Slice& rs,const Slice& cs) const + { + const vector_length_t nb_rows = rs.stop - rs.start; + const vector_length_t nb_cols = cs.stop - cs.start; + + return(MatrixView(Vector_Base

::ptr(rs.start*stride()+cs.start),nb_rows,nb_cols,stride())); + } + + /** @brief Create a matrix view + * @param rs Row start + * @param re Row end + * @param cs Column start + * @param ce Column end + * @return matrix view + * + */ + MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + + /** @brief Create a constant matrix view + * @param rs Row start + * @param re Row end + * @param cs Column start + * @param ce Column end + * @return matrix view + * + */ + const MatrixView sub(const index_t rs, + const index_t re, + const index_t cs, + const index_t ce) const + { + const vector_length_t nb_rows = re - rs; + const vector_length_t nb_cols = ce - cs; + + return(MatrixView(Vector_Base

::ptr(rs*stride()+cs),nb_rows,nb_cols,stride())); + } + +protected: + vector_length_t rows_,columns_; +}; + + +/*! @} */ +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/matrix_view.hpp b/dsppp/Include/dsppp/matrix_view.hpp new file mode 100644 index 00000000..67504d08 --- /dev/null +++ b/dsppp/Include/dsppp/matrix_view.hpp @@ -0,0 +1,1453 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vec.hpp" +#include "matrix_impl.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup Matrix + * @{ + */ + +/** @brief Matrix + * @tparam T Type of the scalar + * @tparam S Stride + */ +template +struct MatrixView +{ + /** @brief Number of rows + * @return Number of rows + */ + vector_length_t rows() const {return(nb_rows_);} + + /** @brief Number of columns + * @return Number of columns + */ + vector_length_t columns() const {return(nb_cols_);} + + /** @brief Number of stride + * @return Number of stride + */ + constexpr uint32_t stride() const {return(S);} + + /** @brief Create matrix view on a buffer (buffer not owned by the view) + * @param v buffer + * @param rows number of rows + * @param cols number of columns + */ + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols): + v_(v),nb_rows_(rows),nb_cols_(cols){}; + + /** @brief Create matrix view on vector (vector not owned by the view) + * @param v vector + * @param rows number of rows + * @param cols number of columns + */ + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols): + v_(v.ptr()),nb_rows_(rows),nb_cols_(cols){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + MatrixView(MatrixView&& other) : + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T& operator()(const index_t r,const index_t c) + { + return(v_[r*stride()+c]); + } + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T const operator()(const index_t r,const index_t c) const + { + return(v_[r*stride()+c]); + } + + + /** @brief Assign matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + /** @brief Assign matrix view from constant + * @param val The constant + * @return the matrix + * + */ + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + /** @brief Add matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Add matrix from matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + + /** @brief Add constant to matrix view + * @param other The constant + * @return the matrix + * + */ + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix view + * @param other Other matrix view + * @return the matrix + * + */ + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract constant + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view constant + * @param other constant + * @return the matrix + * + */ + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Display the matrix content for debug purpose + * @param stream Output stream + * @param other The matrix to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(v_,i+stride()*start,i+stride()*rows())); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i+stride()*start,i+stride()*stop)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i+stride()*start,i+stride()*rows())); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i+stride()*start,i+stride()*stop)); + } + + #if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using VectorType = typename vector_traits::vector; + + /** + * @brief %Vector store at a given row,column position + * + * @param row row index + * @param col column index + * @param val %Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at row,column in this matrix. + */ + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at row,column index in this matrix datatype + * with predication + */ + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val,remaining,inner::vctpq::mk(remaining)); + } + + /** + * @brief %Vector operation at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + * with predication + */ + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at a given row,column position + * + * @param row row index + * @param col column index + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + */ + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]))); + } +#endif + + /** @brief Fill diagonal of a matrix with a vector + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * + */ + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + /** @brief Create the transposed matrix + * @return a matrix + * + */ + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + /** @brief Create a matrix of same type + * @return a matrix + * + */ + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + /** + * @brief Pointer to storage buffer + * @return Pointer to storage + */ + T* ptr() const {return(v_);} + + /** + * @brief Constant pointer to storage buffer + * @return Pointer to storage + */ + const T* const_ptr() const {return(v_);} + +protected: + T* const v_; + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; +}; + +/* + +When the stride is not known at build time AND different +from the nb_cols_ + +*/ + +/** @brief Dynamic Matrix View + * @tparam T Type of the scalar + * + * This template is used for dynamic matrix (stride not known + * at build time) and when we do not know if stride == number of + * columns. + * When stride is different from number of columns, the matrix cannot + * be seen as a vector. + */ +template +struct MatrixView +{ + /** @brief Number of rows + * @return Number of rows + */ + vector_length_t rows() const {return(nb_rows_);} + + /** @brief Number of columns + * @return Number of columns + */ + vector_length_t columns() const {return(nb_cols_);} + + /** @brief Number of stride + * @return Number of stride + */ + uint32_t stride() const {return(stride_);} + + /** @brief Create matrix view on a buffer (buffer not owned by the view) + * @param v buffer + * @param rows number of rows + * @param cols number of columns + * @param stride stride + */ + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols, + const uint32_t stride): + v_(v),nb_rows_(rows),nb_cols_(cols),stride_(stride){}; + + /** @brief Create matrix view on vector (vector not owned by the view) + * @param v vector + * @param rows number of rows + * @param cols number of columns + * @param stride stride + */ + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols, + const uint32_t stride): + v_(v.ptr()),nb_rows_(rows),nb_cols_(cols),stride_(stride){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_),stride_(other.stride_){}; + + MatrixView(MatrixView&& other) : + v_(other.v_), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_),stride_(other.stride_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T& operator()(const index_t r,const index_t c) + { + return(v_[r*stride()+c]); + } + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T const operator()(const index_t r,const index_t c) const + { + return(v_[r*stride()+c]); + } + + + /** @brief Assign matrix view from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + /** @brief Assign matrix view from constant + * @param val The constant + * @return the matrix + * + */ + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + /** @brief Add matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Add matrix from matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Add constant to matrix view + * @param other The constant + * @return the matrix + * + */ + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix view + * @param other Other matrix view + * @return the matrix + * + */ + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract constant + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view constant + * @param other constant + * @return the matrix + * + */ + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Display the matrix content for debug purpose + * @param stream Output stream + * @param other The matrix to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i*stride()+start,i*stride()+stop)); + } + + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(v_,i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(v_,i+stride()*start,i+stride()*stop,stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(v_,i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(v_,i+stride()*start,i+stride()*stop,stride()*CS)); + } + + #if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using VectorType = typename vector_traits::vector; + + + /** + * @brief %Vector store at a given row,column position + * + * @param row row index + * @param col column index + * @param val %Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at row,column in this matrix. + */ + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at row,column index in this matrix datatype + * with predication + */ + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),val,remaining,inner::vctpq::mk(remaining)); + } + + /** + * @brief %Vector operation at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + * with predication + */ + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at a given row,column position + * + * @param row row index + * @param col column index + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + */ + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(&v_[row*stride() + col]))); + } +#endif + + /** @brief Fill diagonal of a matrix with a vector + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * + */ + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + /** @brief Create the transposed matrix + * @return a matrix + * + */ + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + /** @brief Create a matrix of same type + * @return a matrix + * + */ + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + /** + * @brief Pointer to storage buffer + * @return Pointer to storage + */ + T* ptr() const {return(v_);} + + /** + * @brief Constant pointer to storage buffer + * @return Pointer to storage + */ + const T* const_ptr() const {return(v_);} + + +protected: + T* const v_; + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; + const uint32_t stride_; +}; + +/* + + +Dynamic but with stride == nb_cols_ + +*/ + +/** @brief Dynamic Matrix View + * @tparam T Type of the scalar + * + * This template is used for dynamic matrix (stride not known + * at build time) and when we do know that stride == number of + * columns. + * When stride is equal to the number of columns, the matrix can + * be seen as a vector and it enables to use the vector eval loop + * in the operator fusion mechanism. + * + * Those matrix views are created by expression when a reference to + * a matrix is used in the expression tree (to avoid copying the matrix). + * In this case, we do know that the matrix view is the full matrix and thus + * stride == number of columns + */ +template +struct MatrixView:VectorView +{ + /** @brief Number of rows + * @return Number of rows + */ + vector_length_t rows() const {return(nb_rows_);} + + /** @brief Number of columns + * @return Number of columns + */ + vector_length_t columns() const {return(nb_cols_);} + + /** @brief Number of stride + * @return Number of stride + */ + uint32_t stride() const {return(nb_cols_);} + + /** @brief Create matrix view on a buffer (buffer not owned by the view) + * @param v buffer + * @param rows number of rows + * @param cols number of columns + */ + explicit MatrixView(T* v, + const vector_length_t rows, + const vector_length_t cols): + VectorView(v,0,rows*cols), + nb_rows_(rows),nb_cols_(cols){}; + + /** @brief Create matrix view on vector (vector not owned by the view) + * @param v vector + * @param rows number of rows + * @param cols number of columns + */ + explicit MatrixView(const Vector_Base &v, + const vector_length_t rows, + const vector_length_t cols): + VectorView(v.ptr(),0,rows*cols), + nb_rows_(rows),nb_cols_(cols){}; + + virtual ~MatrixView() {}; + + MatrixView(const MatrixView& other): + VectorView(other), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + MatrixView(MatrixView&& other) : + VectorView(std::forward(other)), + nb_rows_(other.nb_rows_),nb_cols_(other.nb_cols_){}; + + + MatrixView& operator=(const MatrixView& other) = delete; + MatrixView& operator=(MatrixView&& other) = delete; + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T& operator()(const index_t r,const index_t c) + { + return(&(*this)[r*stride()+c]); + } + + /** @brief Access matrix view element at given position + * @param r Row index + * @param c Column index + * @return reference to element + * + */ + T const operator()(const index_t r,const index_t c) const + { + return((*this)[r*stride()+c]); + } + + /** @brief Assign matrix view from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator=(const _Expr&other) + { + eval2D(*this,other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + } + + /** @brief Assign matrix view from constant + * @param val The constant + * @return the matrix + * + */ + MatrixView& operator=(const T val) + { + _Fill2D(*this,val,rows(),columns(),CURRENT_ARCH); + + return(*this); + } + + + /** @brief Add matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other Expression + * @return the matrix + * + */ + template + MatrixView& operator +=(const _Expr& other) + { + eval2D(*this,*this + other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Add matrix from matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator +=(const MatrixView& other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Add constant to matrix view + * @param other The constant + * @return the matrix + * + */ + MatrixView& operator +=(const T other) + { + eval2D(*this,*this + other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix from expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator -=(const _Expr& other) + { + eval2D(*this,*this - other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract matrix view + * @param other Other matrix view + * @return the matrix + * + */ + MatrixView& operator -=(const MatrixView& other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Subtract constant + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator -=(const T other) + { + eval2D(*this,*this - other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with expression + * @tparam Derived Datatype representing the abstract syntax tree of the expression + * @param other expression + * @return the matrix + * + */ + template + MatrixView& operator *=(const _Expr& other) + { + eval2D(*this,*this * other.derived(),rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view with matrix view + * @param other Other matrix + * @return the matrix + * + */ + MatrixView& operator *=(const MatrixView& other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** @brief Elementwise multiply matrix view constant + * @param other constant + * @return the matrix + * + */ + MatrixView& operator *=(const T other) + { + eval2D(*this,*this * other,rows(),columns(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Display the matrix content for debug purpose + * @param stream Output stream + * @param other The matrix to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const MatrixView& other) { + for(index_t row=0;row row(const index_t i,const index_t start=0) + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + VectorView row(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start=0) const + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+columns())); + } + + /** @brief Create a constant row view with stride 1 + * @param i row index + * @param start Start index in row + * @param stop Stop index in row + * @return row view vector + * + */ + const VectorView row(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(this->ptr(),i*stride()+start,i*stride()+stop)); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start=0) + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + /** @brief Create a column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + VectorView col(const index_t i,const index_t start,const index_t stop) + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*stop,stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start=0) const + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*rows(),stride()*CS)); + } + + /** @brief Create a constant column view vector + * @tparam CS column stride + * @param i column index + * @param start Start index in column + * @param stop Stop index in column + * @return column view vector + * + */ + template + const VectorView col(const index_t i,const index_t start,const index_t stop) const + { + return(VectorView(this->ptr(),i+stride()*start,i+stride()*stop,stride()*CS)); + } + + #if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using VectorType = typename vector_traits::vector; + + /** + * @brief %Vector store at a given row,column position + * + * @param row row index + * @param col column index + * @param val %Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at row,column in this matrix. + */ + void matrix_store(const index_t row, + const index_t col, + const VectorType val) const + { + inner::vstore1<1>((typename std::remove_cv::type*)(ptr(row*stride() + col)),val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at row,column index in this matrix datatype + * with predication + */ + void matrix_store_tail(const index_t row, + const index_t col, + const vector_length_t remaining, + const VectorType val) const + { + inner::vstore1_z<1>((typename std::remove_cv::type*)(ptr(row*stride() + col)),val,remaining,inner::vctpq::mk(remaining)); + } + + /** + * @brief %Vector operation at a given row,column position with predicated tail + * + * @param row row index + * @param col column index + * @param remaining Number of remaining samples in the loop + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + * with predication + */ + VectorType const matrix_op_tail(const index_t row, + const index_t col, + const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv::type*)(VectorView::ptr(row*stride() + col)),remaining,inner::vctpq::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at a given row,column position + * + * @param row row index + * @param col column index + * @return the vector result of the operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function compute an operation at row,column index in this matrix datatype + */ + VectorType const matrix_op(const index_t row, + const index_t col) const + { + return(inner::vload1<1>((typename std::remove_cv::type*)(VectorView::ptr(row*stride() + col)))); + } +#endif + + /** @brief Fill diagonal of a matrix with a vector + * @tparam VA Vector datatype + * @param a Vector for initializing the diagonal + * + */ + template::value && + SameElementType::value,bool>::type = true> + void fill_diagonal(const VA& a) + { + _fill_diagonal(*this,a,this->length()); + } + + /** @brief Create the transposed matrix + * @return a matrix + * + */ + Matrix transpose() const + { + Matrix res(columns(),rows()); + transposeTo(res,*this); + return(res); + } + + /** @brief Create a matrix of same type + * @return a matrix + * + */ + Matrix create() const + { + Matrix res(rows(),columns()); + return(res); + } + + +protected: + const vector_length_t nb_rows_; + const vector_length_t nb_cols_; +}; + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/memory_pool.hpp b/dsppp/Include/dsppp/memory_pool.hpp new file mode 100644 index 00000000..a152d691 --- /dev/null +++ b/dsppp/Include/dsppp/memory_pool.hpp @@ -0,0 +1,380 @@ +// -*- C++ -*- +/** @file */ +#pragma once + + +#include +#include +#include +#include "common.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup MEMORY Memory allocator + * \ingroup DSPPP + * @{ + */ + +/* + +Buffer allocator + +Can be used to build memory allocators foe vector +and matrix. + +For instance, it is usedin the Memory pool allocator + +*/ + +/** \defgroup MEMBUF Memory buffer allocator + * \ingroup MEMORY + * Allocators for allocating memory buffers + */ + +/** \defgroup MEMVEC Vector / matrix buffer allocator + * \ingroup MEMORY + * Allocators for allocating vector / matrix buffers + */ + +/** \defgroup MEMTOOL Miscellaneous utilities for memory + * \ingroup MEMORY + * Miscellaneous utilities for implementing memory allocators + */ + +/** + * @ingroup MEMBUF + * @brief Malloc memory allocator + * + */ +struct default_user_allocator_malloc_free +{ + /** + * @brief Allocate a buffer + * + * @param[in] bytes The bytes + * + * @return A pointer to the allocated buffer + */ + static char * malloc(const std::size_t bytes) + { + #if !defined(MEMORY_ALLOCATION_DEBUG) + return reinterpret_cast(std::malloc(bytes)); + #else + char *ret=reinterpret_cast(std::malloc(bytes)); + if (ret==nullptr) + { + std::cout << "out of memory for " << bytes << " bytes\r\n"; + } + return(ret); + #endif + } + + /** + * @brief Free a buffer + * + * @param block The buffer to free + */ + static void free(char * const block) + { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (block==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + std::free(block); + } +}; + +/** + * @ingroup MEMBUF + * @brief Aligned memory allocation + * + * @param[in] alignment The alignment of the buffer + * @param[in] size The size of the buffer + * + * @return A pointer to the new buffer + */ +inline void* aligned_malloc(std::size_t alignment, std::size_t size) +{ + void *ptr=std::malloc(size+alignment+sizeof(void*)); + void *aligned = + reinterpret_cast( + (reinterpret_cast(ptr)+sizeof(void*)+alignment) & ~(alignment-1) + ); + + *(static_cast(aligned) - 1) = ptr; + return(aligned); +} + +/** + * @ingroup MEMBUF + * @brief Free an aligned buffer + * + * @param ptr The pointer + */ +inline void +aligned_free(void* ptr) +{ + if (ptr) { + std::free(*(static_cast(ptr) - 1)); + } +}; + +/** + * @ingroup MEMBUF + * @brief Memory allocation for aligned buffers + * + */ +struct user_allocator_aligned_malloc +{ + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + /** + * @brief Allocate a new buffer + * + * @param[in] bytes The bytes + * + * @return Pointer to the new buffer + */ + static char * malloc(const size_type bytes) + { + #if !defined(MEMORY_ALLOCATION_DEBUG) + return reinterpret_cast(aligned_malloc(MEMORY_POOL_ALIGNMENT, bytes)); + #else + char *ret = reinterpret_cast(aligned_malloc(MEMORY_POOL_ALIGNMENT, bytes)); + if (ret==nullptr) + { + std::cout << "out of memory for " << bytes << " bytes\r\n"; + } + return(ret); + #endif + } + + /** + * @brief Free a buffer + * + * @param block Pointer to the buffer + */ + static void free(char * const block) + { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (block==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + aligned_free(block); + } +}; + +/* + +Memory allocator for vector and matrix. + +*/ + +// Default allocator +// Other allocator must be provided by user of the library + +/** + * @ingroup MEMVEC + * @brief Default memory allocator for vectors and matrixes + * + * @tparam L Size known at build time in bytes + */ +template +struct malloc_allocator { + /** + * @brief Allocate a buffer with size known at runtime + * + * @param[in] sz The size + * + * @return Pointer to the buffer + */ + static char* allocate ( vector_length_t sz) noexcept{ + char *res; + res=reinterpret_cast(std::malloc(sz)); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res==nullptr) + { + std::cout << "out of memory for " << sz << " bytes\r\n"; + } + #endif + return(res); + } + + /** + * @brief Allocate a buffer with size known at build time + * + * @return Pointer to the buffer + */ + static char* allocate ( ) noexcept{ + char *res; + res=reinterpret_cast(std::malloc(L)); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res==nullptr) + { + std::cout << "out of memory for " << L << " bytes\r\n"; + } + #endif + return(res); + } + + /** + * @brief Destroys the given pointer. + * + * @param ptr The pointer + */ + static void destroy ( char* ptr ) noexcept { + #if defined(MEMORY_ALLOCATION_DEBUG) + if (ptr==nullptr) + { + std::cout << "free null ptr \r\n"; + } + #endif + std::free(ptr); + } + +}; + + +/* + +Memory pool + +Memory pool is using a buffer +allocator (aligned or normal malloc) + +A memory pool can be used to by a memory allocator for +vectors and matrixes. + + +*/ + +struct ListElem; + +/** + * @ingroup MEMTOOL + * @brief Simple list of elements + * + */ +struct ListElem { + ListElem *next; +}; + +/** + * @ingroup MEMTOOL + * @brief This class describes a memory pool that can be used to build + * a memory allocator for vectors and matrixes + * + * @tparam BUF_SIZE Size of a buffer known at build time + * @tparam UserAllocator Memory allocator to allocate the memory buffer + */ +template +class MemoryPool { +public: + /** + * @brief Create a new memory pool + * + * @param[in] nbBufs The number of buffers to pre-allocate + */ + explicit MemoryPool(const uint16_t nbBufs) + { + buffer_list.reserve(nbBufs); + buffer_list.assign(nbBufs,nullptr); + for(auto p=buffer_list.begin();p != buffer_list.end(); ++p) + { + *p = UserAllocator::malloc(BUF_SIZE < sizeof(ListElem) ? sizeof(ListElem) : BUF_SIZE); + } + reset(); + }; + + /** + * @brief Destroys the object. + */ + ~MemoryPool() + { + for(auto p=buffer_list.begin();p != buffer_list.end(); ++p) + { + UserAllocator::free(*p); + } + } + + MemoryPool(const MemoryPool& other) = delete; + + MemoryPool(MemoryPool&& other) = delete; + + + MemoryPool& operator=(const MemoryPool& other) = delete; + + MemoryPool& operator=(MemoryPool&& other) = delete; + + /** + * @brief Gets the new free buffer. + * + * @return The new buffer. + */ + char* get_new_buffer() noexcept + { + /* No error handling. + The sizing of the pool must have been done, for + instance, with a statistic allocator. + Allocation is thus assumed to succeed */ + char* res = reinterpret_cast(free); + free = free->next; + #if defined(MEMORY_ALLOCATION_DEBUG) + if (res == nullptr) + { + std::cout << "memory pool alloc error " << BUF_SIZE << " bytes\r\n"; + } + #endif + return(res); + } + + /** + * @brief Release the buffer so that it can be reused + * + * @param buf The buffer + */ + void recycle_buffer(char* buf) noexcept + { + ListElem *l = reinterpret_cast(buf); + #if defined(MEMORY_ALLOCATION_DEBUG) + if (l == nullptr) + { + std::cout << "memory pool free error " << BUF_SIZE << " bytes\r\n"; + } + #endif + l->next = free; + free = l; + } + + /** + * @brief Release all the buffers so that they can be reused + */ + void reset() noexcept + { + const int nbBufs = buffer_list.size(); + for(int i=0;i(buffer_list[i]); + l->next = reinterpret_cast(buffer_list[i+1]); + } + ListElem *l=reinterpret_cast(buffer_list[nbBufs-1]); + l->next = nullptr; + free = reinterpret_cast(buffer_list[0]); + } + + + +protected: + ListElem *free; + std::vector buffer_list; +}; + + +/*! @} */ + +} \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/double.hpp b/dsppp/Include/dsppp/num_features/double.hpp new file mode 100644 index 00000000..a6d22a69 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/double.hpp @@ -0,0 +1,148 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber Scalar number definitions + * \ingroup NUMBER + * @{ + * \addtogroup GenericDoubleNumber Double + * \ingroup GenericNumber + * @{ + */ + + +/** + * @brief Features for double + */ +template<> +struct number_traits +{ + //! It is a float number + static constexpr bool is_float = true; + + //! It is not a fixed point + static constexpr bool is_fixed = false; + + //! Accumulator datatype for this scalar datatype + typedef double accumulator; + + /** + * @brief One for this datatype + * + * @return Return 1 representation for this datatype + */ + static constexpr double one() {return 1.0;}; + + //! Compute datatype for this scalar datatype + typedef double compute_type; +}; + +/** + * @brief Default vector datatype description for this scalar datatype + * + * @tparam arch Current architecture + */ +template +struct vector_traits { + + /** + * Scalar datatype + */ + typedef double type; + + /** + * Storage datatype + */ + typedef double storage_type; + + // No vector type but must still be defined + + /** + * Dummy datatype. Must be present for building but not used + * since by default there is no vector architecture assumed + */ + typedef bool vector; + + /** + * Dummy datatype. Must be present for building but not used + * since by default there is no vector architecture assumed + */ + typedef bool temp_accumulator; + + /** + * Dummy datatype. Must be present for building but not used + * since by default there is no vector architecture assumed + */ + typedef uint32_t predicate_t; + + /** + * By default : no vector architecture assumed + */ + static constexpr bool has_vector = false; + + //! It is a float + static constexpr bool is_float = true; + //! Not a fixed point + static constexpr bool is_fixed = false; + //! No predicated loops + static constexpr bool has_predicate = false; +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { + /** + * @brief Convert from accumulator representation + * + * @param[in] a Value + * + * @return Accumulator value converted to current datatype + */ + __STATIC_FORCEINLINE double from_accumulator(const double a) + { + return(a); + }; + +/** + * @brief Multiply and accumulate for this datatype + * + * @param[in] acc The accumulated value + * @param[in] a The left hand side + * @param[in] b The right hand side + * + * @return Return acc + a*b + */ + __STATIC_FORCEINLINE double mac(const double acc,const double a,const double b) + { + return(acc+a*b); + }; + +/** + * @brief Accumulate + * + * @param a Accumulator + * @param[in] b VAlue to be added + */ + __STATIC_FORCEINLINE void accumulate(double &a,const double &b) +{ + a += b; +} + +/** + * @brief Multiply + * + * @param a Left hand side + * @param[in] b Right hand side + * + * @return Return a*b + */ +__STATIC_FORCEINLINE double mult(double &a,const double &b) +{ + return(a*b); +} +} + +/*! @} */ +/*! @} */ diff --git a/dsppp/Include/dsppp/num_features/float.hpp b/dsppp/Include/dsppp/num_features/float.hpp new file mode 100644 index 00000000..6b4632b9 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/float.hpp @@ -0,0 +1,146 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericFloatNumber Float + * \ingroup GenericNumber + * @{ + */ + +/** + * @brief Features for float + */ +template<> +struct number_traits +{ + //! It is a float number + static constexpr bool is_float = true; + + //! It is not a fixed point + static constexpr bool is_fixed = false; + + //! Accumulator datatype for this scalar datatype + typedef float accumulator; + + /** + * @brief One for this datatype + * + * @return Return 1 representation for this datatype + */ + static constexpr float one() {return 1.0f;}; + + //! Compute datatype for this scalar datatype + typedef float compute_type; +}; + + +/* + +If arch is not deriving from Neon or Helium, then there are +no vectors for float + +*/ + +/** + * @brief Vector instructions for float when no Helium or Neon + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value && + !std::is_base_of::value>::type> { + + //! Current type + typedef float type; + + //! Current storage type + typedef float storage_type; + + // No vector type but must still be defined + + //! Dummy type. Not used when no vector instructions + typedef bool vector; + //! Dummy type. Not used when no vector instructions + typedef bool temp_accumulator; + //! Dummy type. Not used when no vector instructions + typedef uint32_t predicate_t; + + + //! No vector instructions for this datatype + static constexpr bool has_vector = false; + //! Is float + static constexpr bool is_float = true; + //! Is fixed + static constexpr bool is_fixed = false; + + //! No predicated loop + static constexpr bool has_predicate = false; + +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { + /** + * @brief Convert from accumulator representtaion + * + * @param[in] a Value + * + * @return Accumulator value converted to current datatype + */ + __STATIC_FORCEINLINE float from_accumulator(const float a) + { + return(a); + }; + +/** + * @brief Scalar multiply and accumulate + * + * @param[in] acc Accumulator + * @param[in] a Operand + * @param[in] b Operand + * + * @return acc + a*b + */ + __STATIC_FORCEINLINE float mac(const float acc,const float a,const float b) + { + return(acc+a*b); + }; + +/** + * @brief Scalar accumulate + * + * @param a Accumulator + * @param[in] b Operand + */ +__STATIC_FORCEINLINE void accumulate(float &a,const float &b) +{ + a += b; +} + +/** + * @brief Scalar multiply + * + * @param a Operand + * @param[in] b Operand + * + * @return a*b + */ +__STATIC_FORCEINLINE float mult(float &a,const float &b) +{ + return(a*b); +} + +} + + +/*! @} */ +/*! @} */ + + diff --git a/dsppp/Include/dsppp/num_features/group.hpp b/dsppp/Include/dsppp/num_features/group.hpp new file mode 100644 index 00000000..4d21e76c --- /dev/null +++ b/dsppp/Include/dsppp/num_features/group.hpp @@ -0,0 +1,399 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericTUPLENumber Tuple + * Tuples of numbers or expressions used for unrolling + * \ingroup GenericNumber + * @{ + */ + +/** + * @brief Number description for a tuple of numbers + * + * @tparam E Datatype for all numbers + */ +template +struct number_traits> +{ + //! It is not a float number + static constexpr bool is_float = false; + + //! It is not a fixed point number + static constexpr bool is_fixed = false; + + //! Tuple of accumulator datatype for the accumulator type + typedef std::tuple::accumulator...> accumulator; + + //! Tuple of compute datatype for the compute type + typedef std::tuple::compute_type...> compute_type; + + /** + * @brief Return of tuples of one values + * + * @return Tuples of one values with different datatypes + */ + static std::tuple::accumulator...> one() + { + return(std::make_tuple(vector_traits::one()...)); + } + +}; + +/* + +Assume that all E are using the same scalar type or coherent types +like f32 and q13 that have same number of lanes. + +Any other mix will not work and won't be catched at build time. + +*/ + +/** + * @brief Tuple of compatible vectors + * + * @tparam arch Current architecture + * @tparam E List of vector dataypes + * + * The vector datatypes must be coherent : have same number of lanes + * or same lane datatype + */ +template +struct vector_traits,arch> { + + //! First element of tuple defines the scalar datatype + using RefScalar = typename std::tuple_element<0,std::tuple>::type; + + + //! Temporary accumulator datatype + typedef std::tuple::temp_accumulator...> temp_accumulator; + + //! Vector datatype + typedef std::tuple::vector...> vector; + + //! Predicate datatype + typedef std::tuple::predicate_t...> predicate_t; + + //! Number of lanes (from RefScalar) + static constexpr int nb_lanes = vector_traits::nb_lanes; + + //! Has vector instructions + static constexpr bool has_vector = vector_traits::has_vector; + + //! Is a float + static constexpr bool is_float = vector_traits::is_float; + + //! Is fixed point + static constexpr bool is_fixed = vector_traits::is_fixed; + + //! Has predicated loop + static constexpr bool has_predicate = vector_traits::has_predicate; + + /** + * @brief Zero represented with temp accumulator datatype + * + * @return Zero represented with temp accumulator datatype + */ + static temp_accumulator temp_acc_zero() + { + return(std::make_tuple(vector_traits::temp_acc_zero()...)); + } + +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { + + + + /* + + Assume that the vctpq is the same for all tuple elements. + If it is not the case, we can't get a predicated loop and + the code contains additional VPSTTTT and it is not + efficient. + + */ +#if defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) + /** + * @brief Tuple of predicates + * + * @tparam E Tuple of datatypes + * + * The datatypes must be coherent (same number of lanes). + * The first element is used to infer the vctpq instruction to use + */ + template + struct vctpq> + { + /** + * @brief Make a predicate for predicated loop + * + * @param[in] v Remaining number of iterations + * + * @return Predicate + */ + static auto mk(const uint32_t v/*, + typename std::enable_if<(vector_traits::nb_lanes == ...),bool>::type* = nullptr*/) + { + return(vctpq>>::mk(v)); + }; + }; +#endif + /* + + Typical configuration is vmacc between tuple and tuple + but also very common is vmacc between tuple and vector + + */ + + /** + * @brief Vector accumulate for tuples of vectors + * + * @param[in] acc The accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @tparam A Accumulator datatype + * @tparam V Vector datatype + * @tparam Ns Tuple index + * + * @return tuple of results + */ + template + __STATIC_FORCEINLINE A vmacc_impl(const A &acc,const V &a,const V &b, std::index_sequence) + { + return(std::make_tuple(vmacc(std::get(acc),std::get(a),std::get(b))...)); + }; + + /** + * @brief Vector accumulate for tuples of vectors + * + * @param[in] acc The accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @tparam A Accumulator datatype + * @tparam E Datatype of tuples elements + * + * @return Accumulator result + */ + template + __STATIC_FORCEINLINE A + vmacc(const A &acc,const std::tuple &a,const std::tuple &b) + { + return(vmacc_impl(acc,a,b,std::make_index_sequence())); + }; + + /** + * @brief Predicated vector accumulate for tuple + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * @param[in] p0 Predicate + * + * @tparam A Accumulator datatype + * @tparam V Vector datatype + * @tparam B Predicate datatype + * @tparam Ns Tuple indexes + * + * @return Tuple of accumulated values + */ + template + __STATIC_FORCEINLINE A vmacc_impl(const A &acc,const V &a,const V &b, const B p0,std::index_sequence) + { + return(std::make_tuple(vmacc(std::get(acc),std::get(a),std::get(b),p0)...)); + }; + + /** + * @brief Predicated vector accumulate for tuples + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * @param[in] p0 Predicate + * + * @tparam A Accumulator datatype + * @tparam B Predicate datatype + * @tparam E Dadatype of tuples elements + * + * @return Tuple of accumulated vectors + */ + template + __STATIC_FORCEINLINE A + vmacc(const A &acc,const std::tuple &a,const std::tuple &b,const B p0) + { + return(vmacc_impl(acc,a,b,p0,std::make_index_sequence())); + }; + + + + /** + * @brief Reduce function for tuple + * + * @param[in] acc Accumulator + * + * @tparam A Accumulator datatype + * @tparam Ns Tuple indexes + * + * @return Reduced accumulator values + * + * Some vector instructions sets cannot accumulate vectors + * into a scalar. They accumulate into this vector. + * This vector must be reduced to a scalar at the end of + * the accumulation loop. + */ + template + __STATIC_FORCEINLINE auto vreduce_impl(const A &acc, std::index_sequence) + { + return(std::make_tuple(vreduce(std::get(acc))...)); + }; + +/** + * @brief Reduce function for tuples + * + * @param[in] acc The accumulator + * + * @tparam E Datatypes for tuples + * + * @return Tuples of reduced values + * + * Some vector instructions sets cannot accumulate vectors + * into a scalar. They accumulate into this vector. + * This vector must be reduced to a scalar at the end of + * the accumulation loop. + * + */ + template + __STATIC_FORCEINLINE auto vreduce(const std::tuple &acc) + { + return(vreduce_impl(acc,std::make_index_sequence())); + }; + + /** + * @brief Convert from accumulator value + * + * @param[in] acc The accumulator + * + * @tparam A Accumulator datatype + * @tparam Ns Tuples indexes + * + * @return Tuples of values + */ + template + __STATIC_FORCEINLINE auto from_accumulator_impl(const A &acc, std::index_sequence) + { + return(std::make_tuple(from_accumulator(std::get(acc))...)); + }; + + /** + * @brief Convert from tuple of accumulator values + * + * @param[in] acc Accumulator + * + * @tparam E Datatypes for tuple + * + * @return Tuples of converted accumulator values + * + * Accumulator may use more bits to avoid saturations. + * At the end of the accumulation, the final result must + * be converted to the current datatype (it may implies saturation) + */ + template + __STATIC_FORCEINLINE auto from_accumulator(const std::tuple &acc) + { + return(from_accumulator_impl(acc,std::make_index_sequence())); + }; + + /** + * @brief Multiply accumulate for tuple of scalar + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @tparam A Accumulator datatype + * @tparam V Scalar datatype + * @tparam Ns Tuple indexes + * + * @return Tuples of accumulated values + */ + template + __STATIC_FORCEINLINE A mac_impl(const A &acc,const V &a,const V &b, std::index_sequence) + { + return(std::make_tuple(mac(std::get(acc),std::get(a),std::get(b))...)); + }; + +/** + * @brief Multiply accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @tparam A Accumulator datatype + * @tparam E Datatypes for tuple + * + * @return Accumulated values + */ + template + __STATIC_FORCEINLINE A + mac(const A &acc,const std::tuple &a,const std::tuple &b) + { + return(mac_impl(acc,a,b,std::make_index_sequence())); + }; + +/** + * @brief Multiply accumulate for tuple of scalar + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * @param[in] p0 Predicate + * + * @tparam A Accumulator datatype + * @tparam V Scalar datatype + * @tparam B Predicate datatype + * @tparam Ns Tuple indexes + * + * @return Tuples of accumulated values + */ + template + __STATIC_FORCEINLINE A mac_impl(const A &acc,const V &a,const V &b, const B p0,std::index_sequence) + { + return(std::make_tuple(mac(std::get(acc),std::get(a),std::get(b),p0)...)); + }; + +/** + * @brief Multiply accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * @param[in] p0 Predicate + * + * @tparam A Accumulator datatype + * @tparam B Predicate datatype + * @tparam E Datatypes for tuple + * + * @return Accumulated values + */ + template + __STATIC_FORCEINLINE A + mac(const A &acc,const std::tuple &a,const std::tuple &b,const B p0) + { + return(mac_impl(acc,a,b,p0,std::make_index_sequence())); + }; + +}; + + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/half.hpp b/dsppp/Include/dsppp/num_features/half.hpp new file mode 100644 index 00000000..ad5ccafe --- /dev/null +++ b/dsppp/Include/dsppp/num_features/half.hpp @@ -0,0 +1,135 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#ifdef DOXYGEN +#define ARM_FLOAT16_SUPPORTED +#endif + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericHalfNumber Half + * \ingroup GenericNumber + * @{ + */ + +#if defined(ARM_FLOAT16_SUPPORTED) + +/** + * @brief Feature of float16 datatype + */ +template<> +struct number_traits +{ + //! It is a float number + static constexpr bool is_float = true; + //! It is not a fixed point number + static constexpr bool is_fixed = false; + //! Accumulator datatype + typedef float16_t accumulator; + + /** + * @brief One value + * + * @return One value in f16 + */ + static constexpr float16_t one() {return ((float16_t)1.0f);}; + + //! Compute datatype + typedef _Float16 compute_type; +}; + + +#if !defined(ARM_MATH_MVE_FLOAT16) +/** + * @brief float16 vector descrition when no vector architecture + */ +template<> +struct vector_traits { + //! Float16 datatype + typedef float16_t type; + //! Float16 storage type + typedef float16_t storage_type; + + // No vector type but must still be defined + //! Dummy type when no vector instruction is supported + typedef bool vector; + //! Dummy type when no vector instruction is supported + typedef bool temp_accumulator; + //! Dummy type when no vector instruction is supported + typedef uint32_t predicate_t; + + //! No vector instruction + static constexpr bool has_vector = false; + //! Is float + static constexpr bool is_float = true; + //! Not fixed point + static constexpr bool is_fixed = false; + //! Has predicated loop + static constexpr bool has_predicate = false; +}; +#endif + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { + /** + * @brief Convert from accumulator datatype + * + * @param[in] a Value + * + * @return Converted from accumulator datatype + */ + __STATIC_FORCEINLINE float16_t from_accumulator(const float16_t a) + { + return(a); + }; + +/** + * @brief Multiply and accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @return acc + a*b + */ + __STATIC_FORCEINLINE float16_t mac(const float16_t acc,const float16_t a,const float16_t b) + { + return((_Float16)acc+(_Float16)a*(_Float16)b); + }; + + +/** + * @brief Accumulate + * + * @param a Accumulator + * @param[in] b Value to accumulate + */ +__STATIC_FORCEINLINE void accumulate(float16_t &a,const float16_t &b) +{ + a += (_Float16)b; +} + +/** + * @brief Multiply + * + * @param a First operand + * @param[in] b Second operand + * + * @return a*b + */ +__STATIC_FORCEINLINE float16_t mult(float16_t &a,const float16_t &b) +{ + return((_Float16)a*(_Float16)b); +} + +} + +#endif + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q15.hpp b/dsppp/Include/dsppp/num_features/q15.hpp new file mode 100644 index 00000000..faf58859 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q15.hpp @@ -0,0 +1,119 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ15Number Q15 + * \ingroup GenericNumber + * @{ + */ + +/** + * @brief Q15 features + */ +template<> +struct number_traits +{ + //! Is not float + static constexpr bool is_float = false; + //! Is fixed point + static constexpr bool is_fixed = true; + //! Accumulator datatype + typedef Q<33,30> accumulator; + /** + * @brief One value + * + * @return One value in Q15 + */ + static constexpr Q15 one() {return Q15::one();}; + //! Compute type + typedef Q15 compute_type; +}; + +/** + * @brief Vector features for Q15 when no vector architecture + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value && + !std::is_base_of::value && + !std::is_base_of::value>::type> { + //! Compute type + typedef Q15 type; + + //! Storage datatype (int16_t) + typedef type::value_type storage_type; + + // No vector type but must still be defined + //! Dummy type when no vector instructions + typedef bool vector; + //! Dummy type when no vector instructions + typedef bool temp_accumulator; + //! Dummy type when no vector instructions + typedef uint32_t predicate_t; + + + //! Has no vector instructions + static constexpr bool has_vector = false; + //! Is not float + static constexpr bool is_float = false; + //! Is fixed point + static constexpr bool is_fixed = true; + //! Has no predicated loop + static constexpr bool has_predicate = false; + +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { +#if defined(ARM_MATH_MVEI) + /** + * @brief Convert from accumulator type + * + * @param[in] a The accumulator value + * + * @return The converted value (with saturation) + */ + __STATIC_FORCEINLINE Q15 from_accumulator(const Q<33,30> a) + { + //return(saturate(toFrac<15>(a))); + return(Q15((sqrshrl_sat48(a.v, -(32-15)) >> 32) & 0xffffffff)); + }; +#else + /** + * @brief Convert from accumulator type + * + * @param[in] a The accumulator value + * + * @return The converted value (with saturation) + */ + __STATIC_FORCEINLINE Q15 from_accumulator(const Q<33,30> a) + { + return(saturate(toFrac<15>(a))); + }; +#endif + + /** + * @brief Multiply and accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @return acc + a*b + */ + __STATIC_FORCEINLINE Q<33,30> mac(const Q<33,30> acc,const Q15 a,const Q15 b) + { + return(accumulate(acc , mult(a,b))); + }; +} + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q31.hpp b/dsppp/Include/dsppp/num_features/q31.hpp new file mode 100644 index 00000000..9df17886 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q31.hpp @@ -0,0 +1,119 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ31Number Q31 + * \ingroup GenericNumber + * @{ + */ + +/** + * @brief Features for Q31 + */ +template<> +struct number_traits +{ + //! Is not a float + static constexpr bool is_float = false; + //! Is fixed point + static constexpr bool is_fixed = true; + //! Accumulator datatype + typedef Q<15,48> accumulator; + /** + * @brief One value + * + * @return One value + */ + static constexpr Q31 one() {return Q31::one();}; + + //! Compute type + typedef Q31 compute_type; +}; + +/** + * @brief Vector features for Q31 when no vector instructions + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value && + !std::is_base_of::value>::type> { + //! Datatype + typedef Q31 type; + + //! Storage tpe (int32_t) + typedef type::value_type storage_type; + + // No vector type but must still be defined + //! Dummy type when no vector instructions are supported + typedef bool vector; + //! Dummy type when no vector instructions are supported + typedef bool temp_accumulator; + //! Dummy type when no vector instructions are supported + typedef uint32_t predicate_t; + + + //! No vector instruction + static constexpr bool has_vector = false; + //! Is not float + static constexpr bool is_float = false; + //! Is fixed + static constexpr bool is_fixed = true; + //! No predicated loop + static constexpr bool has_predicate = false; +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { +#if defined(ARM_MATH_MVEI) + /** + * @brief Convert from accumulator (with no saturation) + * + * @param[in] a Accumulator value + * + * @return Converted value + */ + __STATIC_FORCEINLINE Q31 from_accumulator(const Q<15,48> a) + { + return(Q31(asrl(a.v, 17))); + }; +#else + /** + * @brief Convert from accumulator (with no saturation) + * + * @param[in] a Accumulator value + * + * @return Converted value + */ + __STATIC_FORCEINLINE Q31 from_accumulator(const Q<15,48> a) + { + return(Q31(a.v >> 17)); + }; +#endif + + +/** + * @brief Multiply and accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @return acc + a*b + */ +__STATIC_FORCEINLINE Q<15,48> mac(const Q<15,48> acc,const Q31 a,const Q31 b) +{ + return(accumulate(acc , toFrac<48>(mult(a,b)))); +}; + + } + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/num_features/q7.hpp b/dsppp/Include/dsppp/num_features/q7.hpp new file mode 100644 index 00000000..674a5fe7 --- /dev/null +++ b/dsppp/Include/dsppp/num_features/q7.hpp @@ -0,0 +1,108 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +/** \addtogroup GenericNumber + * \ingroup NUMBER + * @{ + * \addtogroup GenericQ7Number Q7 + * \ingroup GenericNumber + * @{ + */ + +/** + * @brief Q7 features + */ +template<> +struct number_traits +{ + //! Is not float + static constexpr bool is_float = false; + + //! Is fixed point + static constexpr bool is_fixed = true; + + //! Accumulator datatype + typedef Q<17,14> accumulator; + + /** + * @brief One value + * + * @return One value in Q7 + */ + static constexpr Q7 one() {return Q7::one();}; + + + //! Compute type + typedef Q7 compute_type; +}; + +/** + * @brief Vector descrition when no vector architecture + * + * @tparam arch Current architecture + */ +template +struct vector_traits::value && + !std::is_base_of::value && + !std::is_base_of::value>::type> { + //! Current datatype + typedef Q7 type; + + //! Storage datatype (int8_t) + typedef type::value_type storage_type; + + // No vector type but must still be defined + //! Dummy datatype when no vector instructions + typedef bool vector; + //! Dummy datatype when no vector instructions + typedef bool temp_accumulator; + //! Dummy datatype when no vector instructions + typedef uint32_t predicate_t; + + + //! No vector instructions + static constexpr bool has_vector = false; + //! Is not float + static constexpr bool is_float = false; + //! Is fixed point + static constexpr bool is_fixed = true; + //! No predicated loop + static constexpr bool has_predicate = false; +}; + +/** + * Inner implementation of generic intrinsics + * \ingroup GenericNumber + */ +namespace inner { + /** + * @brief Convert from accumulator with saturation + * + * @param[in] a Accumulator value + * + * @return Q7 value + */ + __STATIC_FORCEINLINE Q7 from_accumulator(const Q<17,14> a) + { + return(Q7(__SSAT(a.v >> 7, 8))); + }; + +/** + * @brief Multiply and accumulate + * + * @param[in] acc Accumulator + * @param[in] a First operand + * @param[in] b Second operand + * + * @return acc + a*b + */ + __STATIC_FORCEINLINE Q<17,14> mac(const Q<17,14> acc,const Q7 a,const Q7 b) + { + return(accumulate(acc , mult(a,b))); + }; +} + +/*! @} */ +/*! @} */ \ No newline at end of file diff --git a/dsppp/Include/dsppp/number.hpp b/dsppp/Include/dsppp/number.hpp new file mode 100644 index 00000000..2cfd84a8 --- /dev/null +++ b/dsppp/Include/dsppp/number.hpp @@ -0,0 +1,190 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include "fixed_point.hpp" +#include + +#include "arm_math_types.h" + +#if defined(ARM_FLOAT16_SUPPORTED) +#include "arm_math_types_f16.h" +#endif + +#if defined(ARM_MATH_DSP) +#include "DSP/memory.hpp" +#endif + +namespace arm_cmsis_dsp { + +/** \addtogroup NUMBER Number datatypes + * \ingroup DSPPP + * Number datatypes expressing different properties of the numbers + * according to the architecture. + * + * Those definitions are used to write architecture independents + * algorithms. + * @{ + */ + +constexpr uint32_t maskFromShift(const uint32_t shift) +{ + return ((1<>1)); +} + +/** @brief Properties of a scalar datatype + * @tparam T datatype + * + * Needs to contain two static bool : is_float and is_fixed + * + * Needs to contain a static function `one` returning the value + * 1 for this datatype (used to write some datatype generic + * algorithms) + */ +template +struct number_traits; + + +/* + +When vector is true we have a vector datatype +A temporary accumulator datatype and an accumulator datatype. +For most types the temporary and accumulator are the same. +For float, vector instruction mac is doing a mac per lane. +So temporay is a vector and the final accumulator is a float. + +*/ + +/** @brief Properties of a vector datatype linked to a scalar datatype + * @tparam T Type of the scalar + * @tparam arch Architecture. It is defined by the + * architecture selection code and should never be + * set by the user. + */ +template +struct vector_traits { + typedef T type; //!< Scalar datatype + typedef T storage_type; //!< Storage type (for instance for Q15 scalar the storage is int16_t) + static constexpr bool has_vector = false; //!< True if scalar type has a related vector type + static constexpr bool is_float = false; //!< True if scalar type is a float (half, float or double) + static constexpr bool is_fixed = false; //!< True if scalar type is fixed point +}; + +/** @brief Scalar properties of fixed point datatype + * @tparam M Mantissa bits (not including sign bit) + * @tparam F Fractional bits + * @tparam S Signed or unsigned + * @tparam T Storage datatype + */ +template +struct number_traits> +{ + static constexpr bool is_float = false; //!< False because scalar is not a float datatype (half, float, double) + static constexpr bool is_fixed = true; //!< True because datatype is a fixed point arithmetic one + + /** @brief Return 1 for this datatype + * + * Used for writing datatype generic algorithms + */ + static constexpr Q one() {return Q::one();}; +}; + + +namespace inner { + +/** @brief Predicate (only defined for vector architectures) + * @tparam T scalar data type + * @param v Number of loops + * @return Predicate for the given architecture + */ +template +struct vctpq { +static typename vector_traits::predicate_t mk(uint32_t v); +}; + +}; + + +/* + +vconst +vconst_tail +vadd +vsub +vmul +vacc + + +vload1 +vstore1 + +// When predicate +vctpq +vload1_z +vstore1_z + +// When predicated loop +vadd_x +vsub_x +vmul_x +vmacc_p + + +*/ + + + +// Common to all architectures +#include "num_features/double.hpp" +#include "num_features/float.hpp" +#include "num_features/half.hpp" +#include "num_features/q31.hpp" +#include "num_features/q15.hpp" +#include "num_features/q7.hpp" + +// Specific for some architecture +//#include +#include "DSP/num_features.hpp" +#include "Helium/num_features.hpp" +//#include + + +#include "num_features/group.hpp" + +/* + +If there is the need to tune the intrinsics depending on the +Helium variant of the architecture, somehting like that could be used. +In practice, selection is done at level of of algorithms more than +instructions where it may be simple to just use a #if to use the +right intrinsics when it is available. + +*/ +#if 0 +template +__STATIC_FORCEINLINE mve_pred16_t _vctpq(uint32_t v,Helium * = nullptr); + +template<> +__STATIC_FORCEINLINE mve_pred16_t _vctpq(uint32_t v,Helium *) +{ + return(vctp32q(v)); +}; + +template +__STATIC_FORCEINLINE mve_pred16_t vctpq(uint32_t v) +{ + return(_vctpq(v,CURRENT_ARCH)); +} + +#endif + +/*! @} */ + +} // cmsis-dsp namespace \ No newline at end of file diff --git a/dsppp/Include/dsppp/unroll.hpp b/dsppp/Include/dsppp/unroll.hpp new file mode 100644 index 00000000..b6e6693f --- /dev/null +++ b/dsppp/Include/dsppp/unroll.hpp @@ -0,0 +1,247 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include + +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "matrix.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup UNROLLING Unrolling + * \ingroup DSPPP + * @{ + */ + +template +struct Merged +{ + + using ScalarResult = std::tuple>::Scalar...>; + using TypeOfElement = typename std::tuple_element<0,ScalarResult>::type; + + constexpr explicit Merged(const E& ... values) : vals { values ...} { } + + constexpr Merged(Merged&& other) = default; + constexpr Merged(const Merged& other) = default; + constexpr Merged& operator=(const Merged& other) = delete; + constexpr Merged& operator=(Merged&& other) = delete; + ~Merged() = default; + + constexpr vector_length_t length() const noexcept {return std::get<0>(vals).length();}; + + template + constexpr ScalarResult val_impl(const int i, const std::index_sequence) const noexcept + { + return std::tuple(std::get(vals)[i]...); + } + + constexpr ScalarResult operator[](const int i) noexcept{ + return val_impl(i,std::make_index_sequence()); + } + + constexpr ScalarResult const operator[](const int i) const noexcept{ + return val_impl(i,std::make_index_sequence()); + } + +#if defined(HAS_VECTOR) + + using Vector = std::tuple>::Scalar>::vector...>; + + template + void vector_store_impl(const index_t i,const Vector &val, const std::index_sequence) const noexcept + { + (inner::vstore1<1>((std::get(vals).ptr(i)),std::get(val)),...); + } + + void vector_store(const index_t i,const Vector &val) const noexcept + { + vector_store_impl(i,val,std::make_index_sequence()); + } + +#if defined(HAS_PREDICATED_LOOP) + template + void vector_store_tail_impl(const index_t i,const vector_length_t remaining,const Vector &val, const std::index_sequence) const noexcept + { + (inner::vstore1_z<1>((std::get(vals).ptr(i)),std::get(val),remaining,inner::vctpq::mk(remaining)),...); + } + + + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector &val) const noexcept + { + vector_store_tail_impl(i,remaining,val,std::make_index_sequence()); + } +#endif + + + template + Vector vector_op_impl(const int i, const std::index_sequence) const noexcept + { + return std::make_tuple(std::get(vals).vector_op(i)...); + } + + Vector vector_op(const index_t i) const noexcept + { + return(vector_op_impl(i,std::make_index_sequence())); + } + +#if defined(HAS_PREDICATED_LOOP) + template + Vector vector_op_tail_impl(const index_t i,const vector_length_t remaining, const std::index_sequence) const noexcept + { + return std::make_tuple(std::get(vals).vector_op_tail(i,remaining)...); + } + + Vector vector_op_tail(const index_t i,const vector_length_t remaining) const noexcept + { + return(vector_op_tail_impl(i,remaining,std::make_index_sequence())); + } +#endif +#endif + + template + Merged& operator=(const Merged& other) noexcept + { + eval(*this,other,std::get<0>(vals).length(),CURRENT_ARCH); + return(*this); + } + + const std::tuple vals; +}; + +template +static inline Merged<_Tp&...> +results(_Tp&... __t) noexcept {return Merged<_Tp&...>(__t...);} + + +template +struct traits> +{ + typedef std::tuple>::Scalar...> Scalar; + +#if defined(HAS_VECTOR) + typedef std::tuple>::Scalar>::vector...> Vector; +#endif +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template +struct IsDynamic> +{ + constexpr static bool value = (... && IsDynamic>::value); +}; + +template +struct ElementType> +{ + typedef std::tuple>::type...> type; +}; + +constexpr vector_length_t max_length(const vector_length_t a,const vector_length_t b) noexcept +{ + return((a>b) ? a : b); +}; + + +template +constexpr vector_length_t max_vec_length(F a,N ...b) noexcept +{ + if constexpr (sizeof...(b) == 0) + { + return(a); + } + else + { + return max_length(a,max_vec_length(b...)); + } +}; + + +template +struct StaticLength> +{ + constexpr static vector_length_t value = max_vec_length(StaticLength>::value...); +}; + + + template + auto unroll_impl(const F& func,std::index_sequence) noexcept + { + return Merged{func(Ns)...}; + }; + + template + auto unroll(const F& func) noexcept + { + return unroll_impl(func,std::make_index_sequence()); + }; + + template + constexpr static const E& constres(const E& r,const std::size_t) noexcept + { + return(r); + } + + template + auto replicate_impl(const E& expr,std::index_sequence) noexcept + { + return Merged{constres(expr,Ns)...}; + }; + + template + auto replicate(const E& expr) noexcept + { + return replicate_impl(expr,std::make_index_sequence()); + }; + + /* + + We don't want to replicate the Vector but only a reference + to the vector. So it is packed into an expr + + */ + template typename A> + auto replicate(const Vector& e) noexcept + { + //return replicate_impl(expr(e),std::make_index_sequence()); + return replicate_impl(VectorView(e),std::make_index_sequence()); + }; + + template + auto results_impl(std::array &a,std::index_sequence) noexcept + { + return std::tie(a[Ns]...); + }; + + template + auto results(std::array &a) noexcept + { + return results_impl(a,std::make_index_sequence()); + }; + + template + auto result_impl_func(const F& func,std::index_sequence) noexcept + { + return std::tie(*func(Ns)...); + }; + + template + auto results(const F& func) noexcept + { + return result_impl_func(func,std::make_index_sequence()); + }; + +/*! @} */ +} diff --git a/dsppp/Include/dsppp/vec.hpp b/dsppp/Include/dsppp/vec.hpp new file mode 100644 index 00000000..ec729dea --- /dev/null +++ b/dsppp/Include/dsppp/vec.hpp @@ -0,0 +1,519 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vector_impl.hpp" +#include "vector_view.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR Vectors + * \ingroup DSPPP + * @{ + */ + +template +struct VecRef; + +template +struct VecRef> +{ + typedef VectorView type; + static type ref(const Vector_Base&a){ + return(type(a)); + }; +}; + +template +struct VecRef> +{ + typedef VectorView type; + static type ref(const VectorView&a){ + return(a); + }; +}; + + +template typename A> +struct VecRef,(L<0)> +{ + + typedef VectorView type; + static VectorView ref(const Vector&a,typename std::enable_if<(L<0)>::type* = nullptr){ + return(VectorView(a)); + }; + +}; + +template typename A> +struct VecRef,(L>0)> +{ + typedef const Vector& type; + static const Vector& ref(const Vector&a,typename std::enable_if<(L>0)>::type* = nullptr){ + return(a); + }; +}; + + + +template +struct VecRef<_Binary> +{ + typedef _Binary type; + static type ref(const _Binary&a){ + return(a); + }; +}; + +template +struct VecRef<_Unary> +{ + typedef _Unary type; + static type ref(const _Unary&a){ + return(a); + }; +}; + +template +struct VecRef<_Expr> +{ + typedef Derived type; + static type ref(const _Expr&a){ + return(a.derived()); + }; +}; + +template<> +struct VecRef +{ + typedef double type; + static type ref(const double a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef float type; + static type ref(const float a){ + return(a); + }; +}; + +#if defined(ARM_FLOAT16_SUPPORTED) +template<> +struct VecRef +{ + typedef float16_t type; + static type ref(const float16_t a){ + return(a); + }; +}; +#endif + +template<> +struct VecRef +{ + typedef Q7 type; + static type ref(const Q7 a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef Q15 type; + static type ref(const Q15 a){ + return(a); + }; +}; + +template<> +struct VecRef +{ + typedef Q31 type; + static type ref(const Q31 a){ + return(a); + }; +}; + + +template +struct traits> +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + + +template typename Allocator> +struct traits> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + +template typename Allocator> +struct traits&> +{ + typedef P Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits

::vector Vector; +#endif +}; + + + +template +struct StaticStride +{ + constexpr static std::size_t value = 1; +}; + +template +struct StaticStride> +{ + constexpr static std::size_t value = S; +}; + + +template typename Allocator> +struct IsVector> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template +struct IsVector&> +{ + constexpr static bool value = true; +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + +template typename Allocator> +struct ElementType> +{ + typedef P type; +}; + +template typename Allocator> +struct ElementType&> +{ + typedef P type; +}; + + +template +struct ElementType> +{ + typedef P type; +}; + +template +struct ElementType&> +{ + typedef P type; +}; + +template +struct IsVector> +{ + constexpr static bool value = true; +}; + + +template typename Allocator> +struct StaticLength> +{ + constexpr static vector_length_t value = (L<0) ? 0 : L; +}; + +template typename Allocator> +struct StaticLength&> +{ + constexpr static vector_length_t value = (L<0) ? 0 : L; +}; + + +template +struct ElementType> +{ + typedef T type; +}; + +template +struct ElementType&> +{ + typedef T type; +}; + + +template typename Allocator> +struct IsDynamic> +{ + constexpr static bool value = (L<0); +}; + +template typename Allocator> +struct IsDynamic&> +{ + constexpr static bool value = (L<0); +}; + +template +struct IsDynamic> +{ + constexpr static bool value = true; +}; + + + + +// Assume one at least is static +template +using StaticType=typename std::conditional::value,VB,VA>::type; + + + +/** + * @brief Addition operator for expressions + * + * @tparam LHS Left hand side datatype + * @tparam RHS Right hand side datatype + * @param a Left hand side expression tree + * @param b Right hand side expression tree + * @return Expression representing the add + * + * vector + vector (including matrix) + */ +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator+(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>(VecLHS::ref(a),VecRHS::ref(b),_AddOp())); +}; + + +/** + * @brief + operator for expressions + * + * @tparam LHS Left hand side datatype + * @param a Left hand side expression tree + * @return Expression representing + vector + * + * +vector (including matrix) + */ +template(),bool>::type = true> +inline auto operator+(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + + +/* + +VectorView = VectorView must be a cheap copy of reference only. +So when we want to copy a VectorView onto another we need to +write +VectorView = expr(VectorView) or copy + +we cannot rely on the copy or move constructors. + +*/ + +/** + * @brief Identity operator for expression + * + * @tparam LHS Left hand side datatype + * @param a Left hand side expression tree + * @return Expression representing the identity + * + * Used to consider a vector view as an expression and force the copy + * of this vector view when assigned to another vector entity. + * + */ +template(),bool>::type = true> +inline auto expr(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + +/** + * @brief Identity operator for expression + * + * @tparam LHS Left hand side datatype + * @param a Left hand side expression tree + * @return Expression representing the identity + * + * Used to consider a vector view as an expression and force the copy + * of this vector view when assigned to another vector entity. + * + */ +template(),bool>::type = true> +inline auto copy(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + return(_Unary>(VecLHS::ref(a),_NoOp())); +}; + + +/** + * @brief Subtraction operator for expressions + * + * @tparam LHS Left hand side datatype + * @tparam RHS Right hand side datatype + * @param a Left hand side expression tree + * @param b Right hand side expression tree + * @return Expression representing the add + * + * vector - vector (including matrix) + */ +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator-(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>( + VecLHS::ref(a),VecRHS::ref(b),_SubOp())); +}; + + +/** + * @brief - operator for expressions + * + * @tparam LHS Left hand side datatype + * @param a Left hand side expression tree + * @return Expression representing the - vector + * + * -vector (including matrix) + */ +template(),bool>::type = true> +inline auto operator-(const LHS &a) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + + return(_Unary>(VecLHS::ref(a),_NegOp())); +}; + + +/** + * @brief Element wise multiplication operator for expressions + * + * @tparam LHS Left hand side datatype + * @tparam RHS Right hand side datatype + * @param a Left hand side expression tree + * @param b Right hand side expression tree + * @return Expression representing the * + * + * elementwise vector * vector (including matrix) + */ +template() || + !is_scalar()) && + SameElementType::value && + same_static_length(),bool>::type = true> +inline auto operator*(const LHS &a,const RHS &b) +{ + using Scalar = typename traits::Scalar; + using VecLHS = VecRef; + using VecRHS = VecRef; + + return(_Binary>( + VecLHS::ref(a),VecRHS::ref(b),_MulOp())); +}; + + + +#if 0 +template::value && + IsVector::value && + SameElementType::value && + (same_static_length(StaticLength::value , StaticLength::value)),bool>::type = true> +inline _Expr operator+(const VA &a, + const VB &b) +{ + + return(_Add(a,b)); +}; +#endif + +/* + +Core algorithms that cannot be expressed only with high level +abstractions and need intrinsincs. + +*/ +#include "Helium/matrix_multiply.hpp" +#include "DSP/matrix_multiply.hpp" +#include "Scalar/matrix_multiply.hpp" + +/*! @} */ + +} diff --git a/dsppp/Include/dsppp/vector_impl.hpp b/dsppp/Include/dsppp/vector_impl.hpp new file mode 100644 index 00000000..a01509cb --- /dev/null +++ b/dsppp/Include/dsppp/vector_impl.hpp @@ -0,0 +1,943 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR + * @{ + */ + +/* + +Generic evaluators. + +*/ +#include "Scalar/basic.hpp" +#include "DSP/basic.hpp" +#include "Helium/basic.hpp" +#include "Neon/basic.hpp" + +/** @brief Storage for a vector + * @tparam P Type of the scalar + */ +template +struct Vector_Base { + + //! Type of vector elements + typedef P element_type; + + + /** + * @brief Vector dimension + * @return Vector dimension + * + */ + vector_length_t length() const {return(length_);}; + + /** + * @brief Pointer to storage buffer + * @return Pointer to storage + */ + P* ptr() const {return(values_);} + + /** + * @brief Pointer to storage buffer at index i + * + * @param i Index in buffer + * @return Pointer to storage + * + */ + P* ptr(const index_t i) const {return(&values_[i]);} + + /** + * @brief Pointer to storage buffer + * @return Pointer to constant storage + * + */ + const P* const_ptr() const {return(values_);} + + /** + * @brief Pointer to storage buffer at index i + * + * @param i Index in buffer + * @return Pointer to constant storage + * + */ + const P* const_ptr(const index_t i) const {return(&values_[i]);} + + + + /** + * @brief Iterator begin + * + * @return Pointer to start of buffer + * + */ + P* begin() const {return(values_);} + + /** + * @brief Iterator end + * + * @return Pointer to first element after end of buffer + * + */ + P* end() const {return(values_+length_);} + + /** + * @brief Display the vector content for debug purpose + * @param stream Output stream + * @param other The vector to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const Vector_Base

& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k::vector; + + /** + * @brief %Vector store at index i + * + * @tparam T scalar datatype + * @param i index + * @param val Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at index i in this vector datatype + */ + template::has_vector,bool>::type = true> + void vector_store(const index_t i,const Vector val) const + { + inner::vstore1<1>((typename std::remove_cv

::type*)(&values_[i]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at index i in this vector datatype + * with predication + */ + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector val) const + { + inner::vstore1_z<1>((typename std::remove_cv

::type*)(&values_[i]),val,remaining,inner::vctpq

::mk(remaining)); + } + + /** + * @brief %Vector operation at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @return Result of operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i with predication. + * In the case of a vector, this operation is a load + */ + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(inner::vload1_z<1>((typename std::remove_cv

::type*)(&values_[i]),remaining,inner::vctpq

::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at index i + * + * @param i index + * @return Result of operation + * + * On an architecture supporting vectors, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i. + * In the case of a vector, this operation is a load + */ + Vector const vector_op(const index_t i) const + { + return(inner::vload1<1>((typename std::remove_cv

::type*)(&values_[i]))); + } + +#endif + + + +protected: + + //Vector_Base():length_(0),values_(nullptr){}; + Vector_Base() = delete; + + explicit Vector_Base(vector_length_t length, char *val): + length_(length), + values_(reinterpret_cast(val)){}; + + explicit Vector_Base(vector_length_t length, char *val,P init_val): + length_(length), + values_(reinterpret_cast(val)){ + _Fill(*this,init_val,length,CURRENT_ARCH); + }; + + + Vector_Base& operator=(const Vector_Base& other) + { + if ((length_ == other.length_) && (this != &other)) + { + _Fill(*this,other,other.length_,CURRENT_ARCH); + //std::memcpy(values_,other.values_,sizeof(P)*length_); + } + return(*this); + } + + // Done in derivated classes since we need + // the allocator destroy + Vector_Base& operator=(Vector_Base&& other) = delete; + + + + + vector_length_t length_; + P* values_; +}; + + +template +struct traits> +{ + typedef T Scalar; +#if defined(HAS_VECTOR) + typedef typename vector_traits::vector Vector; +#endif +}; + +/** @brief Vector template for size knonw at build time + * @tparam P Type of the scalar + * @tparam L Vector length in number of elements. + * Negative if length not known at build time. It is the default value + * @tparam Allocator Memory allocator to use. By default it is the macro `TMP_ALLOC` + */ +template typename Allocator = TMP_ALLOC> +struct Vector:Vector_Base

{ + + + //! Type of vector elements + using element_type = P; + + //! Length of the vector when known at build time. + constexpr static vector_length_t vector_size = sizeof(P)*L; + + /** + * @brief Allocate a buffer for this vector using the memory allocator + * + * @return A new memory buffer + */ + static char* allocate(){return(Allocator::allocate());}; + + /** + * @brief Construct a new vector + * + * The length is known at build time. + * + */ + Vector():Vector_Base

(L,Vector::allocate()){}; + + /** + * @brief Construct a new vector and initialize it + * + * The length is known at build time. + * + * @param init_val Initialization value + */ + explicit Vector(P init_val):Vector_Base

(L,Vector::allocate(),init_val){ + }; + + /** + * @brief Construct a new vector and initialize it with a list + * + * A vector can be initialized like an array using {} syntax + * The length is known at build time. + * + * @param l Initialization list + */ + Vector(const std::initializer_list

&l) + :Vector_Base

(L,Vector::allocate()){ + std::memcpy(Vector_Base

::values_,l.data(),vector_size); + }; + + Vector(Vector&& other) = default; + + Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + + //std::memcpy(Vector_Base

::values_,other.values_,vector_size); + }; + + /** + * @brief Create a vector from a vector using a different memory allocator + * + * @param other Other vector using a different memory allocator + */ + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + }; + + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(L,Vector::allocate()) + { + if (other.length() == Vector_Base

::length()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + } + }; + + /** + * @brief Create a vector from a VectorView + * + * @tparam S The stride of the vector view known at build time + * @param other The vector view + */ + template + explicit Vector(const VectorView& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,+other,(vector_length_t)L,CURRENT_ARCH); + }; + + + + /** + * @brief Create a vector from an expression + * + * @tparam Derived The type representing the abstract syntax tree + * @param other The expression + * + * It is the mechanism allowing to evaluate an expression + * and merge all of the operators the of the expression in the + * same loop + */ + template + Vector(const _Expr& other):Vector_Base

(L,Vector::allocate()) + { + eval(*this,other.derived(),(vector_length_t)L,CURRENT_ARCH); + }; + + + Vector& operator=(const Vector& other) = default; + + Vector& operator=(Vector&& other) + { + if (this != &other) + { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + + Vector_Base

::length_= other.length_; + Vector_Base

::values_ = other.values_; + other.values_=nullptr; + other.length_ = 0; + } + + return(*this); + } + + /** + * @brief Copy result of an expression to a vector content + * + * @tparam Derived The type representing the abstract syntax tree + * @param other The expression + * @return A reference to the vector + * + * It is the mechanism allowing to evaluate an expression + * and merge all of the operators the of the expression in the + * same loop + */ + template + Vector& operator=(const _Expr&other) + { + eval(*this,other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + } + + + /** + * @brief Fill a vector with a constant + * + * @tparam T The constant datatype + * @param other The const + * @return A reference to the vector + * + */ + template(),bool>::type = true> + Vector& operator=(const T other) + { + _Fill(*this,other,L,CURRENT_ARCH); + return(*this); + } + + + + + /** + * @brief Elementwise add the result of an expression to a vector + * + * @tparam Derived The type representing the abstract syntax tree of the expression + * @param other The expression + * @return A reference to the vector + * + */ + template + Vector& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add vector to another vector + * + * @param other The vector + * @return A reference to the vector + * + */ + Vector& operator +=(const Vector& other) + { + eval(*this,*this + other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + + /** + * @brief Elementwise add a constant to a vector + * + * @tparam P The constant datatype + * @param other The expression + * @return A reference to the vector + * + */ + Vector& operator +=(const P other) + { + eval(*this,*this + other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract the result of an expression from a vector + * + * @tparam Derived The type representing the abstract syntax tree of the expression + * @param other The expression + * @return A reference to the vector + * + */ + template + Vector& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + + /** + * @brief Elementwise subtract a vector from a vector + * + * @param other The other vector + * @return A reference to the vector + * + */ + Vector& operator -=(const Vector& other) + { + eval(*this,*this - other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a constant from a vector + * + * @tparam P Datatype of the constant + * @param other The constant + * @return A reference to the vector + * + */ + Vector& operator -=(const P other) + { + eval(*this,*this - other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + + /** + * @brief Elementwise multiply the result of an expression with a vector + * + * @tparam Derived The type representing the abstract syntax tree of the expression + * @param other The expression + * @return A reference to the vector + * + */ + template + Vector& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a vector with a vector + * + * @param other The othr vector + * @return A reference to the vector + * + */ + Vector& operator *=(const Vector& other) + { + eval(*this,*this * other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a constant with a vector + * + * @tparam Derived Constant datatype + * @param other The constant + * @return A reference to the vector + * + */ + Vector& operator *=(const P other) + { + eval(*this,*this * other,(vector_length_t)L,CURRENT_ARCH); + return(*this); + }; + + + + /** + * @brief Create a vector view + * + * @tparam S Stride known at build time + * @param start Start index for the vector view + * @param stop Stop index for the vector view (first element after the vector view) + * Default is length of the vector if known at build time. + * @return A reference to the vector view + * + */ + template + VectorView sub(const index_t start=0,const index_t stop=L) + { + return(VectorView(*this,start,stop)); + } + + template + const VectorView sub(const index_t start=0,const index_t stop=L) const + { + return(VectorView(*this,start,stop)); + } + + + virtual ~Vector() { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + } + + + + +}; + + +/** @brief Vector template for dynamic vector (size known at runtime) + * @tparam P Type of the scalar + * @tparam Allocator Memory allocator to use. By default it is the macro `TMP_ALLOC` + */ +template typename Allocator> +struct Vector:Vector_Base

{ + + /** + * @brief Allocate a buffer for this vector using the memory allocator + * + * @param length Vector dimension + * @return A new memory buffer + */ + static char* allocate(vector_length_t length){return(Allocator::allocate(sizeof(P)*length));}; + + Vector() = delete; + + /** + * @brief Create a new vector + * + * @param length Vector dimension + * @param init_val Initialization value + */ + explicit Vector(vector_length_t length,P init_val): + Vector_Base

(length,Vector::allocate(length),init_val){}; + + /** + * @brief Create a new vector + * + * @param length Vector dimension + */ + explicit Vector(vector_length_t length): + Vector_Base

(length,Vector::allocate(length)){}; + + + /** + * @brief Create a new vector + * + * @param l Initializer list + * A vector can be initialized like an array using {} syntax + */ + explicit Vector(const std::initializer_list

&l) + :Vector_Base

(l.size(),Vector::allocate(l.size())){ + std::memcpy(Vector_Base

::values_,l.data(),sizeof(P)*l.size()); + }; + + /** + * @brief Create a new vector from a vector using a different memory allocator + * + * @tparam K Dimension of other vector (statically known or dynamic) + * @param other The vector to copy + */ + template typename OtherAllocator> + explicit Vector(const Vector& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + }; + + + /** + * @brief Create a new vector from a vector of same type + * + * @param other The vector to copy + */ + Vector(const Vector& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + + //std::memcpy(Vector_Base

::values_,other.values_,vector_size); + }; + + /** + * @brief Create a new vector from a vector view + * + * @tparam S Stride of vector view known at build time + * @param other The vector to copy + */ + template + explicit Vector(const VectorView& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,+other,Vector_Base

::length(),CURRENT_ARCH); + }; + + /** + * @brief Create a new vector from an expressipn + * + * @tparam Derived Type representing the abstract syntax tree of the expression + * @param other The expression to evaluate + */ + template + Vector(const _Expr& other):Vector_Base

(other.length(),Vector::allocate(other.length())) + { + eval(*this,other.derived(),Vector_Base

::length(),CURRENT_ARCH); + }; + + Vector(Vector&& other) = default; + + + + Vector& operator=(const Vector& other) = default; + + Vector& operator=(Vector&& other) + { + if (this != &other) + { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + + Vector_Base

::length_= other.length_; + Vector_Base

::values_ = other.values_; + other.values_=nullptr; + other.length_ = 0; + } + + return(*this); + } + + /** + * @brief Fill a vector with an expression + * + * @tparam Derived Type representing the abstract syntax tree of the expression + * @param other The expression to evaluate + * @return A reference to the vector + */ + template + Vector& operator=(const _Expr&other) + { + eval(*this,other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + } + + /** + * @brief Fill a vector with a scalar + * + * @tparam T Scalar datatype + * @param other The scalar + * @return A reference to the vector + */ + template(),bool>::type = true> + Vector& operator=(const T other) + { + _Fill(*this,other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + } + + /** + * @brief Elementwise add an expression to a vector + * + * @tparam Derived Type representing the abstract syntax tree of the expression + * @param other The expression to evaluate + * @return A reference to the vector + */ + template + Vector& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a vector to a vector + * + * @param other The vector to add + * @return A reference to the vector + */ + Vector& operator +=(const Vector& other) + { + eval(*this,*this + other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a scalar to a vector + * + * @tparam P Scalar datatype + * @param other The scalar + * @return A reference to the vector + */ + Vector& operator +=(const P other) + { + eval(*this,*this + other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract an expression to a vector + * + * @tparam Derived Type representing the abstract syntax tree of the expression + * @param other The expression to evaluate + * @return A reference to the vector + */ + template + Vector& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a vector to a vector + * + * @param other The vector to add + * @return A reference to the vector + */ + Vector& operator -=(const Vector& other) + { + eval(*this,*this - other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a scalar to a vector + * + * @tparam P Scalar datatype + * @param other The scalar + * @return A reference to the vector + */ + Vector& operator -=(const P other) + { + eval(*this,*this - other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply an expression with a vector + * + * @tparam Derived Type representing the abstract syntax tree of the expression + * @param other The expression to evaluate + * @return A reference to the vector + */ + template + Vector& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a vector with a vector + * + * @param other The vector to add + * @return A reference to the vector + */ + Vector& operator *=(const Vector& other) + { + eval(*this,*this * other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + + /** + * @brief Elementwise multiply a scalar with a vector + * + * @tparam P Scalar datatype + * @param other The scalar + * @return A reference to the vector + */ + Vector& operator *=(const P other) + { + eval(*this,*this * other,Vector_Base

::length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Create a vector view + * + * @tparam S stride + * @param start Start index of view + * @param stop Stop index of view (first index after end of view) + * By default it is the length of the vector. + * @return The vector view + */ + template + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop<0) + { + return(VectorView(*this,start,Vector_Base

::length())); + } + else + { + return(VectorView(*this,start,stop)); + } + } + + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop<0) + { + return(VectorView(*this,start,Vector_Base

::length())); + } + else + { + return(VectorView(*this,start,stop)); + } + } + + + + virtual ~Vector() { + if (Vector_Base

::values_!=nullptr) + { + Allocator::destroy(reinterpret_cast(Vector_Base

::values_)); + } + } + +}; + +/*! @} */ + +} + diff --git a/dsppp/Include/dsppp/vector_view.hpp b/dsppp/Include/dsppp/vector_view.hpp new file mode 100644 index 00000000..25be3c19 --- /dev/null +++ b/dsppp/Include/dsppp/vector_view.hpp @@ -0,0 +1,940 @@ +// -*- C++ -*- +/** @file */ +#pragma once + +#include +#include +#include +#include +#include "common.hpp" +#include "arch.hpp" +#include +#include "number.hpp" +#include "forward.hpp" +#include "fusion.hpp" +#include "unroll.hpp" +#include "algorithms.hpp" +#include "vector_impl.hpp" + +namespace arm_cmsis_dsp { + +/** \addtogroup VECTOR + * @{ + */ + +/** @brief Vector view + * @tparam T Type of the scalar + * @tparam S Stride known at build time (default 1) + */ +template +struct VectorView +{ + + /* + + Start and stop are the position in the raw Vector_base pointer. + Stop is the first sample outside of the vector + + */ + VectorView() = delete; + + /** + * @brief Compute the number of elements in the vector view + * @param start Vector view start index + * @param stop Vector view stop index (first elemnt after the view) + * @return Vector dimension + * + */ + constexpr static vector_length_t compute_length(const index_t start,const index_t stop) + { + return(1+(stop-1 -start)/stride); + } + + /** + * @brief Create a vector view on a buffer + * @param v Buffer of samples (not owned by the view) + * @param start Start index of the view + * @param stop Stop index of the view (first elemnt after the view) + * + */ + explicit VectorView(T *v,const vector_length_t start,const vector_length_t stop): + v_(v+start),nb_samples_(compute_length(start,stop)){}; + + /** + * @brief Create a vector on a vector + * @param v Vector storage (not owned by the view) + * + */ + explicit VectorView(const Vector_Base &v): + v_(v.ptr()),nb_samples_(compute_length(0,v.length())){}; + + /** + * @brief Create a vector view on vector + * @param v Vector storage (not owned by the view) + * @param start Start index of the view + * @param stop Stop index of the view (first elemnt after the view) + * + */ + explicit VectorView(const Vector_Base &v,const index_t start,const index_t stop): + v_(v.ptr()+start),nb_samples_(compute_length(start,stop)){}; + + /** + * @brief Vector view dimension + * @return Number of elements + * + */ + vector_length_t length() const {return(nb_samples_);}; + + + /** + * @brief Pointer to view storage + * @return Pointer to start of storage + * + */ + T* ptr() const {return(v_);} + + /** + * @brief Pointer to view storage at index i + * @param i Index + * @return Pointer to storage at index i + * + * The stride is used to compute this pointer. + * The index is scaled by the stride. + */ + T* ptr(const index_t i) const {return(&v_[i*stride]);} + + /** + * @brief Pointer to view constant storage + * @return Pointer to start of constant storage + * + */ + const T* const_ptr() const {return(v_);} + + /** + * @brief Pointer to view constant storage at index i + * @param i Index + * @return Pointer to constant storage at index i + * + * The stride is used to compute this pointer. + * The index is scaled by the stride. + */ + const T* const_ptr(const index_t i) const {return(&v_[i*stride]);} + + /** + * @brief Element at index i + * @param i Index + * @return Reference to element + * + * The stride is used to compute this reference. + * The index is scaled by the stride. + */ + T& operator[](const index_t i) + { + return(v_[i*stride]); + } + + /** + * @brief Element at index i + * @param i Index + * @return Reference to element + * + * The stride is used to compute this reference. + * The index is scaled by the stride. + */ + T& operator[](const index_t i) const + { + return(v_[i*stride]); + } + +#if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using Vector = typename vector_traits::vector; + + /** + * @brief %Vector store at index i + * + * @param i index + * @param val Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at index i in this vector datatype + */ + void vector_store(const index_t i,const Vector val) + { + inner::vstore1((typename std::remove_cv::type*)(&v_[i*stride]),val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at index i in this vector datatype + * with predication + */ + void vector_store_tail(const index_t i,const vector_length_t remaining,const Vector val) + { + inner::vstore1_z((typename std::remove_cv::type*)(&v_[i*stride]),val,remaining,inner::vctpq::mk(remaining)); + } + + /** + * @brief %Vector operation at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @return Result of operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i with predication. + * In the case of a vector, this operation is a load + */ + Vector const vector_op_tail(const index_t i,const vector_length_t remaining) const + { + return(inner::vload1_z((typename std::remove_cv::type*)(&v_[i*stride]),remaining,inner::vctpq::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at index i + * + * @param i index + * @return Result of operation + * + * On an architecture supporting vectors, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i. + * In the case of a vector, this operation is a load + */ + Vector const vector_op(const index_t i) const + { + return(inner::vload1((typename std::remove_cv::type*)(&v_[i*stride]))); + } +#endif + + virtual ~VectorView() {}; + + /** + * @brief Create a vector view from another view + * @param other the other vector view + * + * The new vector view will point to the same storage as the + * other vector view. No copy of element is occuring. + * To copy vector view content, the expr and copy operators + * are needed. + */ + VectorView(const VectorView& other): + v_(other.v_),nb_samples_(other.nb_samples_){}; + + + /** + * @brief Move a vector view to another view + * @param other the other vector view + * + * The new vector view will point to the same storage as the + * other vector view. No copy of element is occuring. + * + * The other vector view is no more valid (points to null storage) + */ + VectorView(VectorView&& other) : + v_(std::move(other.v_)),nb_samples_(other.nb_samples_) + { + other.v_ = nullptr; + }; + +VectorView& operator=(const VectorView& other) = delete; +VectorView& operator=(VectorView&& other) = delete; + + + /** + * @brief Assign an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + * Evaluate an expression an assign its result to the vector view + */ + template + VectorView& operator=(const _Expr&other) + { + eval(*this,other.derived(),length(),CURRENT_ARCH); + return(*this); + } + + + /** + * @brief Assign a scalar to a vector view + * @param val the scalar + * @return the vector view + * + */ + VectorView& operator=(const T val) + { + _Fill(*this,val,length(),CURRENT_ARCH); + + return(*this); + } + + /** + * @brief Elementwise add an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator +=(const VectorView& other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator +=(const T other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator -=(const VectorView& other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator -=(const T other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator *=(const VectorView& other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator *=(const T other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Display the vector view content for debug purpose + * @param stream Output stream + * @param other The vector view to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const VectorView& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop < 0) + { + return(VectorView(v_,stride*start,stride*length())); + } + else + { + return(VectorView(v_,stride*start,stride*stop)); + } + } + + /** + * @brief Create a constant sub vector (a view of a view) + * @tparam S stride known at build time + * @param start Start index + * @param stop Stop index (first element after the view) + * By default it is the vector view length + * @return the vector view + * + */ + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop < 0) + { + return(VectorView(v_,stride*start,stride*length())); + } + else + { + return(VectorView(v_,stride*start,stride*stop)); + } + } + + +protected: + T* const v_; + const vector_length_t nb_samples_; +}; + +/** @brief Vector view with dynamic stride (not known at build time) + * @tparam T Type of the scalar + */ +template +struct VectorView +{ + + /* + + Start and stop are the position in the raw Vector_base pointer. + Stop is the first sample outside of the vector + + */ + VectorView() = delete; + + /** + * @brief Compute the number of elements in the vector view + * @param start Vector view start index + * @param stop Vector view stop index (first elemnt after the view) + * @param stride Stride (only known at runtime) + * @return Vector dimension + * + */ + vector_length_t compute_length(const index_t start,const index_t stop,const index_t stride) const + { + return(1+(stop-1 -start)/stride); + } + + /** + * @brief Create a vector view on a buffer + * @param v Buffer of samples (not owned by the view) + * @param start Start index of the view + * @param stop Stop index of the view (first elemnt after the view) + * @param stride Stride (only known at runtime) + * + */ + explicit VectorView(T *v,const index_t start,const index_t stop,const index_t stride): + v_(v+start),nb_samples_(compute_length(start,stop,stride)),stride_(stride){}; + + + /** + * @brief Create a vector view on a vector + * @param v Vector owning the storage (not owned by the view) + * @param stride Stride (only known at runtime) + * + * start is 0 + * stop is defined by the length of the vector + * + */ + explicit VectorView(const Vector_Base &v,const index_t stride): + v_(v.ptr()),nb_samples_(compute_length(0,v.length(),stride)),stride_(stride){}; + + /** + * @brief Create a vector view on a vector + * @param v Vector owning the storage (not owned by the view) + * @param start Start index of the view + * @param stop Stop index + * @param stride Stride (only known at runtime) + * + * + */ + explicit VectorView(const Vector_Base &v,const index_t start,const index_t stop,const index_t stride): + v_(v.ptr()+start),nb_samples_(compute_length(start,stop,stride)),stride_(stride){}; + + /** + * @brief Vector view dimension + * @return Number of elements + * + */ + vector_length_t length() const {return(nb_samples_);}; + + + /** + * @brief Pointer to view storage + * @return Pointer to start of storage + * + */ + T* ptr() const {return(v_);} + + /** + * @brief Pointer to view storage at index i + * @param i Index + * @return Pointer to storage at index i + * + * The stride is used to compute this pointer. + * The index is scaled by the stride. + */ + T* ptr(const index_t i) const {return(&v_[i*stride_]);} + + + /** + * @brief Pointer to view constant storage + * @return Pointer to start of constant storage + * + */ + const T* const_ptr() const {return(v_);} + + /** + * @brief Pointer to view constant storage at index i + * @param i Index + * @return Pointer to constant storage at index i + * + * The stride is used to compute this pointer. + * The index is scaled by the stride. + */ + const T* const_ptr(const index_t i) const {return(&v_[i*stride_]);} + + /** + * @brief Element at index i + * @param i Index + * @return Reference to element + * + * The stride is used to compute this reference. + * The index is scaled by the stride. + */ + T& operator[](index_t i) + { + return(v_[i*stride_]); + } + + /** + * @brief Element at index i + * @param i Index + * @return Reference to element + * + * The stride is used to compute this reference. + * The index is scaled by the stride. + */ + T& operator[](index_t i) const + { + return(v_[i*stride_]); + } + +#if defined(HAS_VECTOR) + //! Type of vectors for a vector architecture and for scalar datatype P + using Vector = typename vector_traits::vector; + + /** + * @brief %Vector store at index i + * + * @param i index + * @param val Vector value + * + * On an architecture supporting vectors, if the scalar datatype T + * has a corresponding vector datatype, this function stores a vector + * value at index i in this vector datatype + */ + void vector_store(index_t i,Vector val) + { + inner::vstore1((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,val); + } + +#if defined(HAS_PREDICATED_LOOP) + /** + * @brief %Vector store at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @param val Vector value to write at index i with tail predication + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function stores a vector value at index i in this vector datatype + * with predication + */ + void vector_store_tail(index_t i,vector_length_t remaining,Vector val) + { + inner::vstore1_z((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,val,remaining,inner::vctpq::mk(remaining)); + } + + /** + * @brief %Vector operation at index i with predicated tail + * + * @param i index + * @param remaining Number of remaining samples in the loop + * @return Result of operation + * + * On an architecture supporting vectors and predicated loops, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i with predication. + * In the case of a vector, this operation is a load + */ + Vector const vector_op_tail(index_t i,vector_length_t remaining) const + { + return(inner::vload1_z((typename std::remove_cv::type*)(&v_[i*stride_]),stride_,remaining,inner::vctpq::mk(remaining))); + } +#endif + + /** + * @brief %Vector operation at index i + * + * @param i index + * @return Result of operation + * + * On an architecture supporting vectors, if the + * scalar datatype T has a corresponding vector datatype, this + * function execute an operation at index i. + * In the case of a vector, this operation is a load + */ + Vector const vector_op(index_t i) const + { + return(inner::vload1((typename std::remove_cv::type*)(&v_[i*stride_]),stride_)); + } +#endif + + virtual ~VectorView() {}; + + /** + * @brief Create a vector view from another view + * @param other the other vector view + * + * The new vector view will point to the same storage as the + * other vector view. No copy of element is occuring. + * To copy vector view content, the expr and copy operators + * are needed. + */ + VectorView(const VectorView& other): + v_(other.v_),nb_samples_(other.nb_samples_),stride_(other.stride_){}; + + + /** + * @brief Move a vector view to another view + * @param other the other vector view + * + * The new vector view will point to the same storage as the + * other vector view. No copy of element is occuring. + * + * The other vector view is no more valid (points to null storage) + */ + VectorView(VectorView&& other) : + v_(std::move(other.v_)),nb_samples_(other.nb_samples_),stride_(other.stride_) + { + other.v_ = nullptr; + }; + +VectorView& operator=(const VectorView& other) = delete; +VectorView& operator=(VectorView&& other) = delete; + + + + /** + * @brief Assign an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + * Evaluate an expression an assign its result to the vector view + */ + template + VectorView& operator=(const _Expr&other) + { + eval(*this,other.derived(),length(),CURRENT_ARCH); + return(*this); + } + + + /** + * @brief Assign a scalar to a vector view + * @param val the scalar + * @return the vector view + * + */ + VectorView& operator=(const T val) + { + _Fill(*this,val,length(),CURRENT_ARCH); + + return(*this); + } + + /** + * @brief Elementwise add an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator +=(const _Expr& other) + { + eval(*this,*this + other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator +=(const VectorView& other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise add a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator +=(const T other) + { + eval(*this,*this + other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator -=(const _Expr& other) + { + eval(*this,*this - other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator -=(const VectorView& other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise subtract a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator -=(const T other) + { + eval(*this,*this - other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply an expression to a vector view + * @tparam Derived the datatype representing the abstract syntax tree of the view + * @param other the expression + * @return the vector view + * + */ + template + VectorView& operator *=(const _Expr& other) + { + eval(*this,*this * other.derived(),length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a vector view to a vector view + * @param other the vector view to add + * @return the vector view + * + */ + VectorView& operator *=(const VectorView& other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Elementwise multiply a scalar to a vector view + * @param other the scalar + * @return the vector view + * + */ + VectorView& operator *=(const T other) + { + eval(*this,*this * other,length(),CURRENT_ARCH); + return(*this); + }; + + /** + * @brief Display the vector view content for debug purpose + * @param stream Output stream + * @param other The vector view to display + * @return the stream + * + */ + friend std::ostream& operator<< (std::ostream& stream, const VectorView& other) { + constexpr int nb = 10; + int i=0; + for(index_t k=0;k + VectorView sub(const index_t start=0,const index_t stop=-1) + { + if (stop<0) + { + return(VectorView(v_,stride()*start,stride()*length(),stride()*S)); + } + else + { + return(VectorView(v_,stride()*start,stride()*stop,stride()*S)); + } + } + + /** + * @brief Create a constant sub vector (a view of a view) + * @tparam S stride known at build time + * @param start Start index + * @param stop Stop index (first element after the view) + * By default it is the vector view length + * @return the vector view + * + */ + template + const VectorView sub(const index_t start=0,const index_t stop=-1) const + { + if (stop<0) + { + return(VectorView(v_,stride()*start,stride()*length(),stride()*S)); + } + else + { + return(VectorView(v_,stride()*start,stride()*stop,stride()*S)); + } + } + +protected: + T* const v_; + const vector_length_t nb_samples_; + const index_t stride_; +}; + +/*! @} */ + +} + diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct new file mode 100644 index 00000000..0f499b2c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m0+ -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 new file mode 100644 index 00000000..0f499b2c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_ac6.sct.base@1.0.0 @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m0+ -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld new file mode 100644 index 00000000..93ed813c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 new file mode 100644 index 00000000..93ed813c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ARMCM0plus_gcc.ld.base@2.2.0 @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct b/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct new file mode 100644 index 00000000..4d6e579d --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/ac6_linker_script.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld b/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld new file mode 100644 index 00000000..40f955c1 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld b/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld new file mode 100644 index 00000000..a018e5d4 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/gcc_linker_script.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h b/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h new file mode 100644 index 00000000..c9b457cb --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/regions_ARMCM0P.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM0P_H +#define REGIONS_ARMCM0P_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 +// + + +#endif /* REGIONS_ARMCM0P_H */ diff --git a/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c new file mode 100644 index 00000000..25b20245 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c @@ -0,0 +1,146 @@ +/****************************************************************************** + * @file startup_ARMCM0plus.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M0+ Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + const VECTOR_TABLE_Type __VECTOR_TABLE[48] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVCall Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10..31 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 new file mode 100644 index 00000000..25b20245 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/startup_ARMCM0plus.c.base@3.0.0 @@ -0,0 +1,146 @@ +/****************************************************************************** + * @file startup_ARMCM0plus.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M0+ Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + const VECTOR_TABLE_Type __VECTOR_TABLE[48] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVCall Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10..31 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c new file mode 100644 index 00000000..164d16da --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c @@ -0,0 +1,69 @@ +/**************************************************************************//** + * @file system_ARMCM0plus.c + * @brief CMSIS Device System Source File for + * ARMCM0plus Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 new file mode 100644 index 00000000..164d16da --- /dev/null +++ b/dsppp/RTE/Device/ARMCM0P/system_ARMCM0plus.c.base@2.0.0 @@ -0,0 +1,69 @@ +/**************************************************************************//** + * @file system_ARMCM0plus.c + * @brief CMSIS Device System Source File for + * ARMCM0plus Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM0P) + #include "ARMCM0plus.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[48]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct new file mode 100644 index 00000000..eb67b5fe --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m4 -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 new file mode 100644 index 00000000..eb67b5fe --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_ac6.sct.base@1.0.0 @@ -0,0 +1,80 @@ +#! armclang -E --target=arm-arm-none-eabi -mcpu=cortex-m4 -xc +; command above MUST be in first line (no comment above!) + +/* +;-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- +*/ + +/*--------------------- Flash Configuration ---------------------------------- +; Flash Configuration +; Flash Base Address <0x0-0xFFFFFFFF:8> +; Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __ROM_BASE 0x00000000 +#define __ROM_SIZE 0x00080000 + +/*--------------------- Embedded RAM Configuration --------------------------- +; RAM Configuration +; RAM Base Address <0x0-0xFFFFFFFF:8> +; RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __RAM_BASE 0x20000000 +#define __RAM_SIZE 0x00040000 + +/*--------------------- Stack / Heap Configuration --------------------------- +; Stack / Heap Configuration +; Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +; Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +; + *----------------------------------------------------------------------------*/ +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 + +/* +;------------- <<< end of configuration section >>> --------------------------- +*/ + + +/*---------------------------------------------------------------------------- + User Stack & Heap boundary definition + *----------------------------------------------------------------------------*/ +#define __STACK_TOP (__RAM_BASE + __RAM_SIZE) /* starts at end of RAM */ +#define __HEAP_BASE (AlignExpr(+0, 8)) /* starts after RW_RAM section, 8 byte aligned */ + + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ +#define __RO_BASE __ROM_BASE +#define __RO_SIZE __ROM_SIZE + +#define __RW_BASE __RAM_BASE +#define __RW_SIZE (__RAM_SIZE - __STACK_SIZE - __HEAP_SIZE) + + +LR_ROM __RO_BASE __RO_SIZE { ; load region size_region + ER_ROM __RO_BASE __RO_SIZE { ; load address = execution address + *.o (RESET, +First) + *(InRoot$$Sections) + .ANY (+RO) + .ANY (+XO) + } + + RW_NOINIT __RW_BASE UNINIT __RW_SIZE { + *(.bss.noinit) + } + + RW_RAM AlignExpr(+0, 8) (__RW_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP __HEAP_BASE EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK __STACK_TOP EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld new file mode 100644 index 00000000..93ed813c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 new file mode 100644 index 00000000..93ed813c --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/ARMCM4_gcc.ld.base@2.2.0 @@ -0,0 +1,263 @@ +/* + *-------- <<< Use Configuration Wizard in Context Menu >>> ------------------- + */ + +/*---------------------- Flash Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00040000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00020000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00000400; +__HEAP_SIZE = 0x00000C00; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > FLASH + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > FLASH + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT > FLASH + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM2 AT > FLASH +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM AT > RAM + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM2 AT > RAM2 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM + + .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM + PROVIDE(__stack = __StackTop); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld b/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld new file mode 100644 index 00000000..40f955c1 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h b/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h new file mode 100644 index 00000000..3ee4d422 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/regions_ARMCM4.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM4_H +#define REGIONS_ARMCM4_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00000C00 +// + + +#endif /* REGIONS_ARMCM4_H */ diff --git a/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c new file mode 100644 index 00000000..9d577736 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file startup_ARMCM4.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M4 Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void MemManage_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void BusFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void UsageFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void DebugMon_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + const VECTOR_TABLE_Type __VECTOR_TABLE[240] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + MemManage_Handler, /* -12 MPU Fault Handler */ + BusFault_Handler, /* -11 Bus Fault Handler */ + UsageFault_Handler, /* -10 Usage Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVC Handler */ + DebugMon_Handler, /* -4 Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10 .. 223 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 new file mode 100644 index 00000000..9d577736 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/startup_ARMCM4.c.base@3.0.0 @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file startup_ARMCM4.c + * @brief CMSIS-Core(M) Device Startup File for a Cortex-M4 Device + * @version V3.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; + +extern __NO_RETURN void __PROGRAM_START(void); + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler (void); + void Default_Handler(void); + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +/* Exceptions */ +void NMI_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void HardFault_Handler (void) __attribute__ ((weak)); +void MemManage_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void BusFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void UsageFault_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SVC_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void DebugMon_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void PendSV_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void SysTick_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + +void Interrupt0_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt1_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt2_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt3_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt4_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt5_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt6_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt7_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt8_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); +void Interrupt9_Handler (void) __attribute__ ((weak, alias("Default_Handler"))); + + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + const VECTOR_TABLE_Type __VECTOR_TABLE[240] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14 NMI Handler */ + HardFault_Handler, /* -13 Hard Fault Handler */ + MemManage_Handler, /* -12 MPU Fault Handler */ + BusFault_Handler, /* -11 Bus Fault Handler */ + UsageFault_Handler, /* -10 Usage Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5 SVC Handler */ + DebugMon_Handler, /* -4 Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2 PendSV Handler */ + SysTick_Handler, /* -1 SysTick Handler */ + + /* Interrupts */ + Interrupt0_Handler, /* 0 Interrupt 0 */ + Interrupt1_Handler, /* 1 Interrupt 1 */ + Interrupt2_Handler, /* 2 Interrupt 2 */ + Interrupt3_Handler, /* 3 Interrupt 3 */ + Interrupt4_Handler, /* 4 Interrupt 4 */ + Interrupt5_Handler, /* 5 Interrupt 5 */ + Interrupt6_Handler, /* 6 Interrupt 6 */ + Interrupt7_Handler, /* 7 Interrupt 7 */ + Interrupt8_Handler, /* 8 Interrupt 8 */ + Interrupt9_Handler /* 9 Interrupt 9 */ + /* Interrupts 10 .. 223 are left out */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +__NO_RETURN void Reset_Handler(void) +{ + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} + + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wmissing-noreturn" +#endif + +/*---------------------------------------------------------------------------- + Hard Fault Handler + *----------------------------------------------------------------------------*/ +void HardFault_Handler(void) +{ + while(1); +} + +/*---------------------------------------------------------------------------- + Default Handler for Exceptions / Interrupts + *----------------------------------------------------------------------------*/ +void Default_Handler(void) +{ + while(1); +} + +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #pragma clang diagnostic pop +#endif + diff --git a/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c new file mode 100644 index 00000000..803d4fc3 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c @@ -0,0 +1,79 @@ +/**************************************************************************//** + * @file system_ARMCM4.c + * @brief CMSIS Device System Source File for + * ARMCM4 Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + +#if defined (__FPU_USED) && (__FPU_USED == 1U) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 new file mode 100644 index 00000000..803d4fc3 --- /dev/null +++ b/dsppp/RTE/Device/ARMCM4/system_ARMCM4.c.base@2.0.0 @@ -0,0 +1,79 @@ +/**************************************************************************//** + * @file system_ARMCM4.c + * @brief CMSIS Device System Source File for + * ARMCM4 Device + * @version V2.0.0 + * @date 06. April 2023 + ******************************************************************************/ +/* + * Copyright (c) 2009-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined (ARMCM4) + #include "ARMCM4.h" +#else + #error device not specified! +#endif + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +#define XTAL (50000000UL) /* Oscillator frequency */ + +#define SYSTEM_CLOCK (XTAL / 2U) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[240]; + + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Core Clock Frequency */ + + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ + +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t) &(__VECTOR_TABLE[0]); +#endif + +#if defined (__FPU_USED) && (__FPU_USED == 1U) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + SystemCoreClock = SYSTEM_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h new file mode 100644 index 00000000..31255472 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RTE_DEVICE_H +#define __RTE_DEVICE_H + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART0] +// Configuration settings for Driver_USART0 in component ::Drivers:USART +#define RTE_USART0 1 + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART1] +// Configuration settings for Driver_USART1 in component ::Drivers:USART +#define RTE_USART1 1 + +// MPC (Memory Protection Controller) [Driver_ISRAM0_MPC] +// Configuration settings for Driver_ISRAM0_MPC in component ::Drivers:MPC +#define RTE_ISRAM0_MPC 0 + +// MPC (Memory Protection Controller) [Driver_ISRAM1_MPC] +// Configuration settings for Driver_ISRAM1_MPC in component ::Drivers:MPC +#define RTE_ISRAM1_MPC 0 + +// MPC (Memory Protection Controller) [Driver_SRAM_MPC] +// Configuration settings for Driver_SRAM_MPC in component ::Drivers:MPC +#define RTE_SRAM_MPC 0 + +// MPC (Memory Protection Controller) [Driver_QSPI_MPC] +// Configuration settings for Driver_QSPI_MPC in component ::Drivers:MPC +#define RTE_QSPI_MPC 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN0] +// Configuration settings for Driver_PPC_SSE300_MAIN0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP0] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP1] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH0] +// Configuration settings for Driver_PPC_SSE300_PERIPH0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH1] +// Configuration settings for Driver_PPC_SSE300_PERIPH1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP0] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP1] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP2] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP2 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP2 0 + +// Flash device emulated by SRAM [Driver_Flash0] +// Configuration settings for Driver_Flash0 in component ::Drivers:Flash +#define RTE_FLASH0 1 + +// I2C SBCon [Driver_I2C0] +// Configuration settings for Driver_I2C0 in component ::Drivers:I2C +#define RTE_I2C0 1 + +#endif /* __RTE_DEVICE_H */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 new file mode 100644 index 00000000..31255472 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/RTE_Device.h.base@1.1.0 @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RTE_DEVICE_H +#define __RTE_DEVICE_H + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART0] +// Configuration settings for Driver_USART0 in component ::Drivers:USART +#define RTE_USART0 1 + +// USART (Universal synchronous - asynchronous receiver transmitter) [Driver_USART1] +// Configuration settings for Driver_USART1 in component ::Drivers:USART +#define RTE_USART1 1 + +// MPC (Memory Protection Controller) [Driver_ISRAM0_MPC] +// Configuration settings for Driver_ISRAM0_MPC in component ::Drivers:MPC +#define RTE_ISRAM0_MPC 0 + +// MPC (Memory Protection Controller) [Driver_ISRAM1_MPC] +// Configuration settings for Driver_ISRAM1_MPC in component ::Drivers:MPC +#define RTE_ISRAM1_MPC 0 + +// MPC (Memory Protection Controller) [Driver_SRAM_MPC] +// Configuration settings for Driver_SRAM_MPC in component ::Drivers:MPC +#define RTE_SRAM_MPC 0 + +// MPC (Memory Protection Controller) [Driver_QSPI_MPC] +// Configuration settings for Driver_QSPI_MPC in component ::Drivers:MPC +#define RTE_QSPI_MPC 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN0] +// Configuration settings for Driver_PPC_SSE300_MAIN0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP0] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_MAIN_EXP1] +// Configuration settings for Driver_PPC_SSE300_MAIN_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_MAIN_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH0] +// Configuration settings for Driver_PPC_SSE300_PERIPH0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH1] +// Configuration settings for Driver_PPC_SSE300_PERIPH1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP0] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP0 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP0 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP1] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP1 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP1 0 + +// PPC (Peripheral Protection Controller) [PPC_SSE300_PERIPH_EXP2] +// Configuration settings for Driver_PPC_SSE300_PERIPH_EXP2 in component ::Drivers:PPC +#define RTE_PPC_SSE300_PERIPH_EXP2 0 + +// Flash device emulated by SRAM [Driver_Flash0] +// Configuration settings for Driver_Flash0 in component ::Drivers:Flash +#define RTE_FLASH0 1 + +// I2C SBCon [Driver_I2C0] +// Configuration settings for Driver_I2C0 in component ::Drivers:I2C +#define RTE_I2C0 1 + +#endif /* __RTE_DEVICE_H */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h new file mode 100644 index 00000000..bfc348f4 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_DRIVER_CONFIG_H__ +#define __CMSIS_DRIVER_CONFIG_H__ + +#include "system_SSE300MPS3.h" +#include "device_cfg.h" +#include "device_definition.h" +#include "platform_base_address.h" + +#endif /* __CMSIS_DRIVER_CONFIG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 new file mode 100644 index 00000000..bfc348f4 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/cmsis_driver_config.h.base@1.1.1 @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2019-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_DRIVER_CONFIG_H__ +#define __CMSIS_DRIVER_CONFIG_H__ + +#include "system_SSE300MPS3.h" +#include "device_cfg.h" +#include "device_definition.h" +#include "platform_base_address.h" + +#endif /* __CMSIS_DRIVER_CONFIG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h new file mode 100644 index 00000000..2ff3eaa7 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEVICE_CFG_H__ +#define __DEVICE_CFG_H__ + +/** + * \file device_cfg.h + * \brief Configuration file native driver re-targeting + * + * \details This file can be used to add native driver specific macro + * definitions to select which peripherals are available in the build. + * + * This is a default device configuration file with all peripherals enabled. + */ + +/* Secure only peripheral configuration */ + +/* ARM MPS3 IO SCC */ +#define MPS3_IO_S +#define MPS3_IO_DEV MPS3_IO_DEV_S + +/* I2C_SBCon */ +#define I2C0_SBCON_S +#define I2C0_SBCON_DEV I2C0_SBCON_DEV_S + +/* I2S */ +#define MPS3_I2S_S +#define MPS3_I2S_DEV MPS3_I2S_DEV_S + +/* ARM UART Controller PL011 */ +#define UART0_CMSDK_S +#define UART0_CMSDK_DEV UART0_CMSDK_DEV_S +#define UART1_CMSDK_S +#define UART1_CMSDK_DEV UART1_CMSDK_DEV_S + +#define DEFAULT_UART_BAUDRATE 115200U + +/* To be used as CODE and DATA sram */ +#define MPC_ISRAM0_S +#define MPC_ISRAM0_DEV MPC_ISRAM0_DEV_S + +#define MPC_ISRAM1_S +#define MPC_ISRAM1_DEV MPC_ISRAM0_DEV_S + +#define MPC_SRAM_S +#define MPC_SRAM_DEV MPC_SRAM_DEV_S + +#define MPC_QSPI_S +#define MPC_QSPI_DEV MPC_QSPI_DEV_S + +/** System Counter Armv8-M */ +#define SYSCOUNTER_CNTRL_ARMV8_M_S +#define SYSCOUNTER_CNTRL_ARMV8_M_DEV SYSCOUNTER_CNTRL_ARMV8_M_DEV_S + +#define SYSCOUNTER_READ_ARMV8_M_S +#define SYSCOUNTER_READ_ARMV8_M_DEV SYSCOUNTER_READ_ARMV8_M_DEV_S +/** + * Arbitrary scaling values for test purposes + */ +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_FRACT 0u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_FRACT 0u + +/* System timer */ +#define SYSTIMER0_ARMV8_M_S +#define SYSTIMER0_ARMV8_M_DEV SYSTIMER0_ARMV8_M_DEV_S +#define SYSTIMER1_ARMV8_M_S +#define SYSTIMER1_ARMV8_M_DEV SYSTIMER1_ARMV8_M_DEV_S +#define SYSTIMER2_ARMV8_M_S +#define SYSTIMER2_ARMV8_M_DEV SYSTIMER2_ARMV8_M_DEV_S +#define SYSTIMER3_ARMV8_M_S +#define SYSTIMER3_ARMV8_M_DEV SYSTIMER3_ARMV8_M_DEV_S + +#define SYSTIMER0_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER1_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER2_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER3_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) + +/* CMSDK GPIO driver structures */ +#define GPIO0_CMSDK_S +#define GPIO0_CMSDK_DEV GPIO0_CMSDK_DEV_S +#define GPIO1_CMSDK_S +#define GPIO1_CMSDK_DEV GPIO1_CMSDK_DEV_S +#define GPIO2_CMSDK_S +#define GPIO2_CMSDK_DEV GPIO2_CMSDK_DEV_S +#define GPIO3_CMSDK_S +#define GPIO3_CMSDK_DEV GPIO3_CMSDK_DEV_S + +/* System Watchdogs */ +#define SYSWDOG_ARMV8_M_S +#define SYSWDOG_ARMV8_M_DEV SYSWDOG_ARMV8_M_DEV_S + +/* ARM MPC SIE 300 driver structures */ +#define MPC_VM0_S +#define MPC_VM0_DEV MPC_VM0_DEV_S +#define MPC_VM1_S +#define MPC_VM1_DEV MPC_VM1_DEV_S +#define MPC_SSRAM2_S +#define MPC_SSRAM2_DEV MPC_SSRAM2_DEV_S +#define MPC_SSRAM3_S +#define MPC_SSRAM3_DEV MPC_SSRAM3_DEV_S + +/* ARM PPC driver structures */ +#define PPC_SSE300_MAIN0_S +#define PPC_SSE300_MAIN0_DEV PPC_SSE300_MAIN0_DEV_S +#define PPC_SSE300_MAIN_EXP0_S +#define PPC_SSE300_MAIN_EXP0_DEV PPC_SSE300_MAIN_EXP0_DEV_S +#define PPC_SSE300_MAIN_EXP1_S +#define PPC_SSE300_MAIN_EXP1_DEV PPC_SSE300_MAIN_EXP1_DEV_S +#define PPC_SSE300_MAIN_EXP2_S +#define PPC_SSE300_MAIN_EXP2_DEV PPC_SSE300_MAIN_EXP2_DEV_S +#define PPC_SSE300_MAIN_EXP3_S +#define PPC_SSE300_MAIN_EXP3_DEV PPC_SSE300_MAIN_EXP3_DEV_S +#define PPC_SSE300_PERIPH0_S +#define PPC_SSE300_PERIPH0_DEV PPC_SSE300_PERIPH0_DEV_S +#define PPC_SSE300_PERIPH1_S +#define PPC_SSE300_PERIPH1_DEV PPC_SSE300_PERIPH1_DEV_S +#define PPC_SSE300_PERIPH_EXP0_S +#define PPC_SSE300_PERIPH_EXP0_DEV PPC_SSE300_PERIPH_EXP0_DEV_S +#define PPC_SSE300_PERIPH_EXP1_S +#define PPC_SSE300_PERIPH_EXP1_DEV PPC_SSE300_PERIPH_EXP1_DEV_S +#define PPC_SSE300_PERIPH_EXP2_S +#define PPC_SSE300_PERIPH_EXP2_DEV PPC_SSE300_PERIPH_EXP2_DEV_S +#define PPC_SSE300_PERIPH_EXP3_S +#define PPC_SSE300_PERIPH_EXP3_DEV PPC_SSE300_PERIPH_EXP3_DEV_S + +/* ARM SPI PL022 */ +/* Invalid device stubs are not defined */ +#define DEFAULT_SPI_SPEED_HZ 4000000U /* 4MHz */ +#define SPI1_PL022_S +#define SPI1_PL022_DEV SPI1_PL022_DEV_S + + +#endif /* __DEVICE_CFG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 new file mode 100644 index 00000000..2ff3eaa7 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/device_cfg.h.base@1.1.3 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEVICE_CFG_H__ +#define __DEVICE_CFG_H__ + +/** + * \file device_cfg.h + * \brief Configuration file native driver re-targeting + * + * \details This file can be used to add native driver specific macro + * definitions to select which peripherals are available in the build. + * + * This is a default device configuration file with all peripherals enabled. + */ + +/* Secure only peripheral configuration */ + +/* ARM MPS3 IO SCC */ +#define MPS3_IO_S +#define MPS3_IO_DEV MPS3_IO_DEV_S + +/* I2C_SBCon */ +#define I2C0_SBCON_S +#define I2C0_SBCON_DEV I2C0_SBCON_DEV_S + +/* I2S */ +#define MPS3_I2S_S +#define MPS3_I2S_DEV MPS3_I2S_DEV_S + +/* ARM UART Controller PL011 */ +#define UART0_CMSDK_S +#define UART0_CMSDK_DEV UART0_CMSDK_DEV_S +#define UART1_CMSDK_S +#define UART1_CMSDK_DEV UART1_CMSDK_DEV_S + +#define DEFAULT_UART_BAUDRATE 115200U + +/* To be used as CODE and DATA sram */ +#define MPC_ISRAM0_S +#define MPC_ISRAM0_DEV MPC_ISRAM0_DEV_S + +#define MPC_ISRAM1_S +#define MPC_ISRAM1_DEV MPC_ISRAM0_DEV_S + +#define MPC_SRAM_S +#define MPC_SRAM_DEV MPC_SRAM_DEV_S + +#define MPC_QSPI_S +#define MPC_QSPI_DEV MPC_QSPI_DEV_S + +/** System Counter Armv8-M */ +#define SYSCOUNTER_CNTRL_ARMV8_M_S +#define SYSCOUNTER_CNTRL_ARMV8_M_DEV SYSCOUNTER_CNTRL_ARMV8_M_DEV_S + +#define SYSCOUNTER_READ_ARMV8_M_S +#define SYSCOUNTER_READ_ARMV8_M_DEV SYSCOUNTER_READ_ARMV8_M_DEV_S +/** + * Arbitrary scaling values for test purposes + */ +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE0_FRACT 0u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_INT 1u +#define SYSCOUNTER_ARMV8_M_DEFAULT_SCALE1_FRACT 0u + +/* System timer */ +#define SYSTIMER0_ARMV8_M_S +#define SYSTIMER0_ARMV8_M_DEV SYSTIMER0_ARMV8_M_DEV_S +#define SYSTIMER1_ARMV8_M_S +#define SYSTIMER1_ARMV8_M_DEV SYSTIMER1_ARMV8_M_DEV_S +#define SYSTIMER2_ARMV8_M_S +#define SYSTIMER2_ARMV8_M_DEV SYSTIMER2_ARMV8_M_DEV_S +#define SYSTIMER3_ARMV8_M_S +#define SYSTIMER3_ARMV8_M_DEV SYSTIMER3_ARMV8_M_DEV_S + +#define SYSTIMER0_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER1_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER2_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) +#define SYSTIMER3_ARMV8M_DEFAULT_FREQ_HZ (25000000ul) + +/* CMSDK GPIO driver structures */ +#define GPIO0_CMSDK_S +#define GPIO0_CMSDK_DEV GPIO0_CMSDK_DEV_S +#define GPIO1_CMSDK_S +#define GPIO1_CMSDK_DEV GPIO1_CMSDK_DEV_S +#define GPIO2_CMSDK_S +#define GPIO2_CMSDK_DEV GPIO2_CMSDK_DEV_S +#define GPIO3_CMSDK_S +#define GPIO3_CMSDK_DEV GPIO3_CMSDK_DEV_S + +/* System Watchdogs */ +#define SYSWDOG_ARMV8_M_S +#define SYSWDOG_ARMV8_M_DEV SYSWDOG_ARMV8_M_DEV_S + +/* ARM MPC SIE 300 driver structures */ +#define MPC_VM0_S +#define MPC_VM0_DEV MPC_VM0_DEV_S +#define MPC_VM1_S +#define MPC_VM1_DEV MPC_VM1_DEV_S +#define MPC_SSRAM2_S +#define MPC_SSRAM2_DEV MPC_SSRAM2_DEV_S +#define MPC_SSRAM3_S +#define MPC_SSRAM3_DEV MPC_SSRAM3_DEV_S + +/* ARM PPC driver structures */ +#define PPC_SSE300_MAIN0_S +#define PPC_SSE300_MAIN0_DEV PPC_SSE300_MAIN0_DEV_S +#define PPC_SSE300_MAIN_EXP0_S +#define PPC_SSE300_MAIN_EXP0_DEV PPC_SSE300_MAIN_EXP0_DEV_S +#define PPC_SSE300_MAIN_EXP1_S +#define PPC_SSE300_MAIN_EXP1_DEV PPC_SSE300_MAIN_EXP1_DEV_S +#define PPC_SSE300_MAIN_EXP2_S +#define PPC_SSE300_MAIN_EXP2_DEV PPC_SSE300_MAIN_EXP2_DEV_S +#define PPC_SSE300_MAIN_EXP3_S +#define PPC_SSE300_MAIN_EXP3_DEV PPC_SSE300_MAIN_EXP3_DEV_S +#define PPC_SSE300_PERIPH0_S +#define PPC_SSE300_PERIPH0_DEV PPC_SSE300_PERIPH0_DEV_S +#define PPC_SSE300_PERIPH1_S +#define PPC_SSE300_PERIPH1_DEV PPC_SSE300_PERIPH1_DEV_S +#define PPC_SSE300_PERIPH_EXP0_S +#define PPC_SSE300_PERIPH_EXP0_DEV PPC_SSE300_PERIPH_EXP0_DEV_S +#define PPC_SSE300_PERIPH_EXP1_S +#define PPC_SSE300_PERIPH_EXP1_DEV PPC_SSE300_PERIPH_EXP1_DEV_S +#define PPC_SSE300_PERIPH_EXP2_S +#define PPC_SSE300_PERIPH_EXP2_DEV PPC_SSE300_PERIPH_EXP2_DEV_S +#define PPC_SSE300_PERIPH_EXP3_S +#define PPC_SSE300_PERIPH_EXP3_DEV PPC_SSE300_PERIPH_EXP3_DEV_S + +/* ARM SPI PL022 */ +/* Invalid device stubs are not defined */ +#define DEFAULT_SPI_SPEED_HZ 4000000U /* 4MHz */ +#define SPI1_PL022_S +#define SPI1_PL022_DEV SPI1_PL022_DEV_S + + +#endif /* __DEVICE_CFG_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld new file mode 100644 index 00000000..5c64ad4f --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld @@ -0,0 +1,242 @@ +;/* +; * Copyright (c) 2009-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; */ + +/* Linker script to configure memory regions. */ +/* This file will be run trough the pre-processor. */ + +#include "region_defs.h" + +MEMORY +{ + FLASH (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + RAM (rw) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +} + +__heap_size__ = HEAP_SIZE; +__stack_size__ = STACK_SIZE; + +/* Library configurations */ +GROUP(libgcc.a libc.a libm.a libnosys.a) + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapBase + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + * __Vectors_End + * __Vectors_Size + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + __Vectors_End = .; + __Vectors_Size = __Vectors_End - __Vectors; + __end__ = .; + + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in a + * separate 32 bytes aligned region so that the SAU can programmed to just set + * this region as Non-Secure Callable. The maximum size of this executable + * region makes it only used the space left over by the ER_CODE region + * so that you can rely on code+veneer size combined will not exceed the + * S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + .gnu.sgstubs : ALIGN(32) + { + *(.gnu.sgstubs*) + } > FLASH + . = ALIGN(32); + Image$$ER_CODE_CMSE_VENEER$$Base = ADDR(.gnu.sgstubs); + Image$$ER_CODE_CMSE_VENEER$$Limit = .; + Image$$ER_CODE_CMSE_VENEER$$Length = Image$$ER_CODE_CMSE_VENEER$$Limit - Image$$ER_CODE_CMSE_VENEER$$Base; + + /* Make sure veneers fit into code memory */ + ASSERT(((S_CODE_START + S_CODE_SIZE) > Image$$ER_CODE_CMSE_VENEER$$Limit), "Veneer region does not fit into code memory") + + .ARM.extab : ALIGN(32) + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* To copy multiple ROM to RAM sections, + * define etext2/data2_start/data2_end and + * define __STARTUP_COPY_MULTIPLE in startup_cmsdk_mps2_sse_200.S */ + .copy.table : ALIGN(4) + { + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__) / 4) + LONG (DEFINED(__etext2) ? __etext2 : 0) + LONG (DEFINED(__data2_start__) ? __data2_start__ : 0) + LONG (DEFINED(__data2_start__) ? ((__data2_end__ - __data2_start__) / 4) : 0) + __copy_table_end__ = .; + } > FLASH + + /* To clear multiple BSS sections, + * uncomment .zero.table section and, + * define __STARTUP_CLEAR_BSS_MULTIPLE in startup_ARMCMx.S */ + .zero.table : ALIGN(4) + { + __zero_table_start__ = .; + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__) / 4) + LONG (DEFINED(__bss2_start__) ? __bss2_start__ : 0) + LONG (DEFINED(__bss2_start__) ? ((__bss2_end__ - __bss2_start__) / 4) : 0) + __zero_table_end__ = .; + } > FLASH + + __etext = ALIGN(4); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT> FLASH + + .bss : ALIGN(4) + { + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + bss_size = __bss_end__ - __bss_start__; + + .stack : ALIGN(8) + { + __StackLimit = .; + KEEP(*(.stack*)) + . += __stack_size__ - 0x8; + __StackTop = .; + } > RAM + + .msp_stack_seal_res : + { + . += 0x8; + } > RAM + __StackSeal = ADDR(.msp_stack_seal_res); + + .heap : ALIGN(8) + { + __end__ = .; + PROVIDE(end = .); + __HeapBase = .; + . += __heap_size__; + __HeapLimit = .; + __heap_limit = .; /* Add for _sbrk */ + } > RAM + + /* Set stack top to end of the used RAM section, and stack limit move down by + * size of stack_dummy section */ + PROVIDE(__stack = __StackTop); + + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackTop <= (S_DATA_START + S_DATA_SIZE), "Secure RAM region overflowed") +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 new file mode 100644 index 00000000..ff09e8e3 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.ld.base@1.0.0 @@ -0,0 +1,242 @@ +;/* +; * Copyright (c) 2009-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; */ + +/* Linker script to configure memory regions. */ +/* This file will be run trough the pre-processor. */ + +#include "region_defs.h" + +MEMORY +{ + FLASH (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + RAM (rwx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +} + +__heap_size__ = HEAP_SIZE; +__stack_size__ = STACK_SIZE; + +/* Library configurations */ +GROUP(libgcc.a libc.a libm.a libnosys.a) + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapBase + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + * __Vectors_End + * __Vectors_Size + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + __Vectors_End = .; + __Vectors_Size = __Vectors_End - __Vectors; + __end__ = .; + + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > FLASH + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in a + * separate 32 bytes aligned region so that the SAU can programmed to just set + * this region as Non-Secure Callable. The maximum size of this executable + * region makes it only used the space left over by the ER_CODE region + * so that you can rely on code+veneer size combined will not exceed the + * S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + .gnu.sgstubs : ALIGN(32) + { + *(.gnu.sgstubs*) + } > FLASH + . = ALIGN(32); + Image$$ER_CODE_CMSE_VENEER$$Base = ADDR(.gnu.sgstubs); + Image$$ER_CODE_CMSE_VENEER$$Limit = .; + Image$$ER_CODE_CMSE_VENEER$$Length = Image$$ER_CODE_CMSE_VENEER$$Limit - Image$$ER_CODE_CMSE_VENEER$$Base; + + /* Make sure veneers fit into code memory */ + ASSERT(((S_CODE_START + S_CODE_SIZE) > Image$$ER_CODE_CMSE_VENEER$$Limit), "Veneer region does not fit into code memory") + + .ARM.extab : ALIGN(32) + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* To copy multiple ROM to RAM sections, + * define etext2/data2_start/data2_end and + * define __STARTUP_COPY_MULTIPLE in startup_cmsdk_mps2_sse_200.S */ + .copy.table : ALIGN(4) + { + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__) / 4) + LONG (DEFINED(__etext2) ? __etext2 : 0) + LONG (DEFINED(__data2_start__) ? __data2_start__ : 0) + LONG (DEFINED(__data2_start__) ? ((__data2_end__ - __data2_start__) / 4) : 0) + __copy_table_end__ = .; + } > FLASH + + /* To clear multiple BSS sections, + * uncomment .zero.table section and, + * define __STARTUP_CLEAR_BSS_MULTIPLE in startup_ARMCMx.S */ + .zero.table : ALIGN(4) + { + __zero_table_start__ = .; + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__) / 4) + LONG (DEFINED(__bss2_start__) ? __bss2_start__ : 0) + LONG (DEFINED(__bss2_start__) ? ((__bss2_end__ - __bss2_start__) / 4) : 0) + __zero_table_end__ = .; + } > FLASH + + __etext = ALIGN(4); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM AT> FLASH + + .bss : ALIGN(4) + { + __bss_start__ = .; + *(.bss*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + bss_size = __bss_end__ - __bss_start__; + + .stack : ALIGN(8) + { + __StackLimit = .; + KEEP(*(.stack*)) + . += __stack_size__ - 0x8; + __StackTop = .; + } > RAM + + .msp_stack_seal_res : + { + . += 0x8; + } > RAM + __StackSeal = ADDR(.msp_stack_seal_res); + + .heap : ALIGN(8) + { + __end__ = .; + PROVIDE(end = .); + __HeapBase = .; + . += __heap_size__; + __HeapLimit = .; + __heap_limit = .; /* Add for _sbrk */ + } > RAM + + /* Set stack top to end of the used RAM section, and stack limit move down by + * size of stack_dummy section */ + PROVIDE(__stack = __StackTop); + + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackTop <= (S_DATA_START + S_DATA_SIZE), "Secure RAM region overflowed") +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct new file mode 100644 index 00000000..8b95c189 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct @@ -0,0 +1,62 @@ +#! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main -E -xc + +;/* +; * Copyright (c) 2018-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE - 0x8 { ; Reserve empty region for stack + } + + STACKSEAL +0 EMPTY 0x8 { + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 new file mode 100644 index 00000000..8b95c189 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/linker_SSE300MPS3_secure.sct.base@1.1.0 @@ -0,0 +1,62 @@ +#! armclang --target=arm-arm-none-eabi -march=armv8.1-m.main -E -xc + +;/* +; * Copyright (c) 2018-2023 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE - 0x8 { ; Reserve empty region for stack + } + + STACKSEAL +0 EMPTY 0x8 { + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h new file mode 100644 index 00000000..32ac16b3 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 new file mode 100644 index 00000000..32ac16b3 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_defs.h.base@1.0.0 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h new file mode 100644 index 00000000..0d600a36 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00038000) /* 1 KiB */ +#define STACK_SIZE (0x00002000) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 new file mode 100644 index 00000000..e7897866 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/region_limits.h.base@1.0.0 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00000400) /* 1 KiB */ +#define STACK_SIZE (0x00000400) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c new file mode 100644 index 00000000..72b39ca5 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 startup_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; +extern uint32_t __STACK_LIMIT; +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +extern uint64_t __STACK_SEAL; +#endif + +extern void __PROGRAM_START(void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +void Reset_Handler (void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +#define DEFAULT_IRQ_HANDLER(handler_name) \ +void __WEAK handler_name(void) __NO_RETURN; \ +void handler_name(void) { \ + while(1); \ +} + +/* Exceptions */ +DEFAULT_IRQ_HANDLER(NMI_Handler) +DEFAULT_IRQ_HANDLER(HardFault_Handler) +DEFAULT_IRQ_HANDLER(MemManage_Handler) +DEFAULT_IRQ_HANDLER(BusFault_Handler) +DEFAULT_IRQ_HANDLER(UsageFault_Handler) +DEFAULT_IRQ_HANDLER(SecureFault_Handler) +DEFAULT_IRQ_HANDLER(SVC_Handler) +DEFAULT_IRQ_HANDLER(DebugMon_Handler) +DEFAULT_IRQ_HANDLER(PendSV_Handler) +DEFAULT_IRQ_HANDLER(SysTick_Handler) + +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_RESET_REQ_Handler) +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_Handler) +DEFAULT_IRQ_HANDLER(SLOWCLK_Timer_Handler) +DEFAULT_IRQ_HANDLER(TFM_TIMER0_IRQ_Handler) +DEFAULT_IRQ_HANDLER(TIMER1_Handler) +DEFAULT_IRQ_HANDLER(TIMER2_Handler) +DEFAULT_IRQ_HANDLER(MPC_Handler) +DEFAULT_IRQ_HANDLER(PPC_Handler) +DEFAULT_IRQ_HANDLER(MSC_Handler) +DEFAULT_IRQ_HANDLER(BRIDGE_ERROR_Handler) +DEFAULT_IRQ_HANDLER(MGMT_PPU_Handler) +DEFAULT_IRQ_HANDLER(SYS_PPU_Handler) +DEFAULT_IRQ_HANDLER(CPU0_PPU_Handler) +DEFAULT_IRQ_HANDLER(DEBUG_PPU_Handler) +DEFAULT_IRQ_HANDLER(TIMER3_AON_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_0_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_1_Handler) + +DEFAULT_IRQ_HANDLER(System_Timestamp_Counter_Handler) +DEFAULT_IRQ_HANDLER(UARTRX0_Handler) +DEFAULT_IRQ_HANDLER(UARTTX0_Handler) +DEFAULT_IRQ_HANDLER(UARTRX1_Handler) +DEFAULT_IRQ_HANDLER(UARTTX1_Handler) +DEFAULT_IRQ_HANDLER(UARTRX2_Handler) +DEFAULT_IRQ_HANDLER(UARTTX2_Handler) +DEFAULT_IRQ_HANDLER(UARTRX3_Handler) +DEFAULT_IRQ_HANDLER(UARTTX3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX4_Handler) +DEFAULT_IRQ_HANDLER(UARTTX4_Handler) +DEFAULT_IRQ_HANDLER(UART0_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART1_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART2_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART3_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART4_Combined_Handler) +DEFAULT_IRQ_HANDLER(UARTOVF_Handler) +DEFAULT_IRQ_HANDLER(ETHERNET_Handler) +DEFAULT_IRQ_HANDLER(I2S_Handler) +DEFAULT_IRQ_HANDLER(TOUCH_SCREEN_Handler) +DEFAULT_IRQ_HANDLER(USB_Handler) +DEFAULT_IRQ_HANDLER(SPI_ADC_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD0_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD1_Handler) +DEFAULT_IRQ_HANDLER(ETHOS_U55_Handler) +#ifdef CORSTONE300_AN547 +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Combined_Handler) +#endif +DEFAULT_IRQ_HANDLER(GPIO0_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX5_Handler) +DEFAULT_IRQ_HANDLER(UARTTX5_Handler) +DEFAULT_IRQ_HANDLER(UART5_Handler) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[]; + const VECTOR_TABLE_Type __VECTOR_TABLE[] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14: NMI Handler */ + HardFault_Handler, /* -13: Hard Fault Handler */ + MemManage_Handler, /* -12: MPU Fault Handler */ + BusFault_Handler, /* -11: Bus Fault Handler */ + UsageFault_Handler, /* -10: Usage Fault Handler */ + SecureFault_Handler, /* -9: Secure Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5: SVCall Handler */ + DebugMon_Handler, /* -4: Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2: PendSV Handler */ + SysTick_Handler, /* -1: SysTick Handler */ + + NONSEC_WATCHDOG_RESET_REQ_Handler, /* 0: Non-Secure Watchdog Reset Request Handler */ + NONSEC_WATCHDOG_Handler, /* 1: Non-Secure Watchdog Handler */ + SLOWCLK_Timer_Handler, /* 2: SLOWCLK Timer Handler */ + TFM_TIMER0_IRQ_Handler, /* 3: TIMER 0 Handler */ + TIMER1_Handler, /* 4: TIMER 1 Handler */ + TIMER2_Handler, /* 5: TIMER 2 Handler */ + 0, /* 6: Reserved */ + 0, /* 7: Reserved */ + 0, /* 8: Reserved */ + MPC_Handler, /* 9: MPC Combined (Secure) Handler */ + PPC_Handler, /* 10: PPC Combined (Secure) Handler */ + MSC_Handler, /* 11: MSC Combined (Secure) Handler */ + BRIDGE_ERROR_Handler, /* 12: Bridge Error (Secure) Handler */ + 0, /* 13: Reserved */ + MGMT_PPU_Handler, /* 14: MGMT PPU Handler */ + SYS_PPU_Handler, /* 15: SYS PPU Handler */ + CPU0_PPU_Handler, /* 16: CPU0 PPU Handler */ + 0, /* 17: Reserved */ + 0, /* 18: Reserved */ + 0, /* 19: Reserved */ + 0, /* 20: Reserved */ + 0, /* 21: Reserved */ + 0, /* 22: Reserved */ + 0, /* 23: Reserved */ + 0, /* 24: Reserved */ + 0, /* 25: Reserved */ + DEBUG_PPU_Handler, /* 26: DEBUG PPU Handler */ + TIMER3_AON_Handler, /* 27: TIMER 3 AON Handler */ + CPU0_CTI_0_Handler, /* 28: CPU0 CTI IRQ 0 Handler */ + CPU0_CTI_1_Handler, /* 29: CPU0 CTI IRQ 1 Handler */ + 0, /* 30: Reserved */ + 0, /* 31: Reserved */ + + /* External interrupts */ + System_Timestamp_Counter_Handler, /* 32: System timestamp counter Handler */ + UARTRX0_Handler, /* 33: UART 0 RX Handler */ + UARTTX0_Handler, /* 34: UART 0 TX Handler */ + UARTRX1_Handler, /* 35: UART 1 RX Handler */ + UARTTX1_Handler, /* 36: UART 1 TX Handler */ + UARTRX2_Handler, /* 37: UART 2 RX Handler */ + UARTTX2_Handler, /* 38: UART 2 TX Handler */ + UARTRX3_Handler, /* 39: UART 3 RX Handler */ + UARTTX3_Handler, /* 40: UART 3 TX Handler */ + UARTRX4_Handler, /* 41: UART 4 RX Handler */ + UARTTX4_Handler, /* 42: UART 4 TX Handler */ + UART0_Combined_Handler, /* 43: UART 0 Combined Handler */ + UART1_Combined_Handler, /* 44: UART 1 Combined Handler */ + UART2_Combined_Handler, /* 45: UART 2 Combined Handler */ + UART3_Combined_Handler, /* 46: UART 3 Combined Handler */ + UART4_Combined_Handler, /* 47: UART 4 Combined Handler */ + UARTOVF_Handler, /* 48: UART 0, 1, 2, 3, 4 & 5 Overflow Handler */ + ETHERNET_Handler, /* 49: Ethernet Handler */ + I2S_Handler, /* 50: Audio I2S Handler */ + TOUCH_SCREEN_Handler, /* 51: Touch Screen Handler */ + USB_Handler, /* 52: USB Handler */ + SPI_ADC_Handler, /* 53: SPI ADC Handler */ + SPI_SHIELD0_Handler, /* 54: SPI (Shield 0) Handler */ + SPI_SHIELD1_Handler, /* 55: SPI (Shield 0) Handler */ + ETHOS_U55_Handler, /* 56: Ethos-U55 Handler */ +#ifdef CORSTONE300_AN547 + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + DMA_Ch_1_Error_Handler, /* 60: DMA Ch1 Error Handler */ + DMA_Ch_1_Terminal_Count_Handler, /* 61: DMA Ch1 Terminal Count Handler */ + DMA_Ch_1_Combined_Handler, /* 62: DMA Ch1 Combined Handler */ + DMA_Ch_2_Error_Handler, /* 63: DMA Ch2 Error Handler */ + DMA_Ch_2_Terminal_Count_Handler, /* 64: DMA Ch2 Terminal Count Handler */ + DMA_Ch_2_Combined_Handler, /* 65: DMA Ch2 Combined Handler */ + DMA_Ch_3_Error_Handler, /* 66: DMA Ch3 Error Handler */ + DMA_Ch_3_Terminal_Count_Handler, /* 67: DMA Ch3 Terminal Count Handler */ + DMA_Ch_3_Combined_Handler, /* 68: DMA Ch3 Combined Handler */ +#else + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + 0, /* 60: Reserved */ + 0, /* 61: Reserved */ + 0, /* 62: Reserved */ + 0, /* 63: Reserved */ + 0, /* 64: Reserved */ + 0, /* 65: Reserved */ + 0, /* 66: Reserved */ + 0, /* 67: Reserved */ + 0, /* 68: Reserved */ +#endif + GPIO0_Combined_Handler, /* 69: GPIO 0 Combined Handler */ + GPIO1_Combined_Handler, /* 70: GPIO 1 Combined Handler */ + GPIO2_Combined_Handler, /* 71: GPIO 2 Combined Handler */ + GPIO3_Combined_Handler, /* 72: GPIO 3 Combined Handler */ + GPIO0_0_Handler, /* 73: GPIO0 Pin 0 Handler */ + GPIO0_1_Handler, /* 74: GPIO0 Pin 1 Handler */ + GPIO0_2_Handler, /* 75: GPIO0 Pin 2 Handler */ + GPIO0_3_Handler, /* 76: GPIO0 Pin 3 Handler */ + GPIO0_4_Handler, /* 77: GPIO0 Pin 4 Handler */ + GPIO0_5_Handler, /* 78: GPIO0 Pin 5 Handler */ + GPIO0_6_Handler, /* 79: GPIO0 Pin 6 Handler */ + GPIO0_7_Handler, /* 80: GPIO0 Pin 7 Handler */ + GPIO0_8_Handler, /* 81: GPIO0 Pin 8 Handler */ + GPIO0_9_Handler, /* 82: GPIO0 Pin 9 Handler */ + GPIO0_10_Handler, /* 83: GPIO0 Pin 10 Handler */ + GPIO0_11_Handler, /* 84: GPIO0 Pin 11 Handler */ + GPIO0_12_Handler, /* 85: GPIO0 Pin 12 Handler */ + GPIO0_13_Handler, /* 86: GPIO0 Pin 13 Handler */ + GPIO0_14_Handler, /* 87: GPIO0 Pin 14 Handler */ + GPIO0_15_Handler, /* 88: GPIO0 Pin 15 Handler */ + GPIO1_0_Handler, /* 89: GPIO1 Pin 0 Handler */ + GPIO1_1_Handler, /* 90: GPIO1 Pin 1 Handler */ + GPIO1_2_Handler, /* 91: GPIO1 Pin 2 Handler */ + GPIO1_3_Handler, /* 92: GPIO1 Pin 3 Handler */ + GPIO1_4_Handler, /* 93: GPIO1 Pin 4 Handler */ + GPIO1_5_Handler, /* 94: GPIO1 Pin 5 Handler */ + GPIO1_6_Handler, /* 95: GPIO1 Pin 6 Handler */ + GPIO1_7_Handler, /* 96: GPIO1 Pin 7 Handler */ + GPIO1_8_Handler, /* 97: GPIO1 Pin 8 Handler */ + GPIO1_9_Handler, /* 98: GPIO1 Pin 9 Handler */ + GPIO1_10_Handler, /* 99: GPIO1 Pin 10 Handler */ + GPIO1_11_Handler, /* 100: GPIO1 Pin 11 Handler */ + GPIO1_12_Handler, /* 101: GPIO1 Pin 12 Handler */ + GPIO1_13_Handler, /* 102: GPIO1 Pin 13 Handler */ + GPIO1_14_Handler, /* 103: GPIO1 Pin 14 Handler */ + GPIO1_15_Handler, /* 104: GPIO1 Pin 15 Handler */ + GPIO2_0_Handler, /* 105: GPIO2 Pin 0 Handler */ + GPIO2_1_Handler, /* 106: GPIO2 Pin 1 Handler */ + GPIO2_2_Handler, /* 107: GPIO2 Pin 2 Handler */ + GPIO2_3_Handler, /* 108: GPIO2 Pin 3 Handler */ + GPIO2_4_Handler, /* 109: GPIO2 Pin 4 Handler */ + GPIO2_5_Handler, /* 110: GPIO2 Pin 5 Handler */ + GPIO2_6_Handler, /* 111: GPIO2 Pin 6 Handler */ + GPIO2_7_Handler, /* 112: GPIO2 Pin 7 Handler */ + GPIO2_8_Handler, /* 113: GPIO2 Pin 8 Handler */ + GPIO2_9_Handler, /* 114: GPIO2 Pin 9 Handler */ + GPIO2_10_Handler, /* 115: GPIO2 Pin 10 Handler */ + GPIO2_11_Handler, /* 116: GPIO2 Pin 11 Handler */ + GPIO2_12_Handler, /* 117: GPIO2 Pin 12 Handler */ + GPIO2_13_Handler, /* 118: GPIO2 Pin 13 Handler */ + GPIO2_14_Handler, /* 119: GPIO2 Pin 14 Handler */ + GPIO2_15_Handler, /* 120: GPIO2 Pin 15 Handler */ + GPIO3_0_Handler, /* 121: GPIO3 Pin 0 Handler */ + GPIO3_1_Handler, /* 122: GPIO3 Pin 1 Handler */ + GPIO3_2_Handler, /* 123: GPIO3 Pin 2 Handler */ + GPIO3_3_Handler, /* 124: GPIO3 Pin 3 Handler */ + UARTRX5_Handler, /* 125: UART 5 RX Interrupt */ + UARTTX5_Handler, /* 126: UART 5 TX Interrupt */ + UART5_Handler, /* 127: UART 5 combined Interrupt */ + 0, /* 128: Reserved */ + 0, /* 129: Reserved */ + 0, /* 130: Reserved */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +void Reset_Handler(void) +{ + __set_PSP((uint32_t)(&__INITIAL_SP)); + + __set_MSPLIM((uint32_t)(&__STACK_LIMIT)); + __set_PSPLIM((uint32_t)(&__STACK_LIMIT)); + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + __TZ_set_STACKSEAL_S((uint32_t *)(&__STACK_SEAL)); +#endif + + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 new file mode 100644 index 00000000..72b39ca5 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/startup_SSE300MPS3.c.base@1.1.1 @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 startup_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + External References + *----------------------------------------------------------------------------*/ +extern uint32_t __INITIAL_SP; +extern uint32_t __STACK_LIMIT; +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +extern uint64_t __STACK_SEAL; +#endif + +extern void __PROGRAM_START(void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Internal References + *----------------------------------------------------------------------------*/ +void Reset_Handler (void) __NO_RETURN; + +/*---------------------------------------------------------------------------- + Exception / Interrupt Handler + *----------------------------------------------------------------------------*/ +#define DEFAULT_IRQ_HANDLER(handler_name) \ +void __WEAK handler_name(void) __NO_RETURN; \ +void handler_name(void) { \ + while(1); \ +} + +/* Exceptions */ +DEFAULT_IRQ_HANDLER(NMI_Handler) +DEFAULT_IRQ_HANDLER(HardFault_Handler) +DEFAULT_IRQ_HANDLER(MemManage_Handler) +DEFAULT_IRQ_HANDLER(BusFault_Handler) +DEFAULT_IRQ_HANDLER(UsageFault_Handler) +DEFAULT_IRQ_HANDLER(SecureFault_Handler) +DEFAULT_IRQ_HANDLER(SVC_Handler) +DEFAULT_IRQ_HANDLER(DebugMon_Handler) +DEFAULT_IRQ_HANDLER(PendSV_Handler) +DEFAULT_IRQ_HANDLER(SysTick_Handler) + +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_RESET_REQ_Handler) +DEFAULT_IRQ_HANDLER(NONSEC_WATCHDOG_Handler) +DEFAULT_IRQ_HANDLER(SLOWCLK_Timer_Handler) +DEFAULT_IRQ_HANDLER(TFM_TIMER0_IRQ_Handler) +DEFAULT_IRQ_HANDLER(TIMER1_Handler) +DEFAULT_IRQ_HANDLER(TIMER2_Handler) +DEFAULT_IRQ_HANDLER(MPC_Handler) +DEFAULT_IRQ_HANDLER(PPC_Handler) +DEFAULT_IRQ_HANDLER(MSC_Handler) +DEFAULT_IRQ_HANDLER(BRIDGE_ERROR_Handler) +DEFAULT_IRQ_HANDLER(MGMT_PPU_Handler) +DEFAULT_IRQ_HANDLER(SYS_PPU_Handler) +DEFAULT_IRQ_HANDLER(CPU0_PPU_Handler) +DEFAULT_IRQ_HANDLER(DEBUG_PPU_Handler) +DEFAULT_IRQ_HANDLER(TIMER3_AON_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_0_Handler) +DEFAULT_IRQ_HANDLER(CPU0_CTI_1_Handler) + +DEFAULT_IRQ_HANDLER(System_Timestamp_Counter_Handler) +DEFAULT_IRQ_HANDLER(UARTRX0_Handler) +DEFAULT_IRQ_HANDLER(UARTTX0_Handler) +DEFAULT_IRQ_HANDLER(UARTRX1_Handler) +DEFAULT_IRQ_HANDLER(UARTTX1_Handler) +DEFAULT_IRQ_HANDLER(UARTRX2_Handler) +DEFAULT_IRQ_HANDLER(UARTTX2_Handler) +DEFAULT_IRQ_HANDLER(UARTRX3_Handler) +DEFAULT_IRQ_HANDLER(UARTTX3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX4_Handler) +DEFAULT_IRQ_HANDLER(UARTTX4_Handler) +DEFAULT_IRQ_HANDLER(UART0_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART1_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART2_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART3_Combined_Handler) +DEFAULT_IRQ_HANDLER(UART4_Combined_Handler) +DEFAULT_IRQ_HANDLER(UARTOVF_Handler) +DEFAULT_IRQ_HANDLER(ETHERNET_Handler) +DEFAULT_IRQ_HANDLER(I2S_Handler) +DEFAULT_IRQ_HANDLER(TOUCH_SCREEN_Handler) +DEFAULT_IRQ_HANDLER(USB_Handler) +DEFAULT_IRQ_HANDLER(SPI_ADC_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD0_Handler) +DEFAULT_IRQ_HANDLER(SPI_SHIELD1_Handler) +DEFAULT_IRQ_HANDLER(ETHOS_U55_Handler) +#ifdef CORSTONE300_AN547 +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_1_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_2_Combined_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Error_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Terminal_Count_Handler) +DEFAULT_IRQ_HANDLER(DMA_Ch_3_Combined_Handler) +#endif +DEFAULT_IRQ_HANDLER(GPIO0_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_Combined_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO0_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO1_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_3_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_4_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_5_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_6_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_7_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_8_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_9_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_10_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_11_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_12_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_13_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_14_Handler) +DEFAULT_IRQ_HANDLER(GPIO2_15_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_0_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_1_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_2_Handler) +DEFAULT_IRQ_HANDLER(GPIO3_3_Handler) +DEFAULT_IRQ_HANDLER(UARTRX5_Handler) +DEFAULT_IRQ_HANDLER(UARTTX5_Handler) +DEFAULT_IRQ_HANDLER(UART5_Handler) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +extern const VECTOR_TABLE_Type __VECTOR_TABLE[]; + const VECTOR_TABLE_Type __VECTOR_TABLE[] __VECTOR_TABLE_ATTRIBUTE = { + (VECTOR_TABLE_Type)(&__INITIAL_SP), /* Initial Stack Pointer */ + Reset_Handler, /* Reset Handler */ + NMI_Handler, /* -14: NMI Handler */ + HardFault_Handler, /* -13: Hard Fault Handler */ + MemManage_Handler, /* -12: MPU Fault Handler */ + BusFault_Handler, /* -11: Bus Fault Handler */ + UsageFault_Handler, /* -10: Usage Fault Handler */ + SecureFault_Handler, /* -9: Secure Fault Handler */ + 0, /* Reserved */ + 0, /* Reserved */ + 0, /* Reserved */ + SVC_Handler, /* -5: SVCall Handler */ + DebugMon_Handler, /* -4: Debug Monitor Handler */ + 0, /* Reserved */ + PendSV_Handler, /* -2: PendSV Handler */ + SysTick_Handler, /* -1: SysTick Handler */ + + NONSEC_WATCHDOG_RESET_REQ_Handler, /* 0: Non-Secure Watchdog Reset Request Handler */ + NONSEC_WATCHDOG_Handler, /* 1: Non-Secure Watchdog Handler */ + SLOWCLK_Timer_Handler, /* 2: SLOWCLK Timer Handler */ + TFM_TIMER0_IRQ_Handler, /* 3: TIMER 0 Handler */ + TIMER1_Handler, /* 4: TIMER 1 Handler */ + TIMER2_Handler, /* 5: TIMER 2 Handler */ + 0, /* 6: Reserved */ + 0, /* 7: Reserved */ + 0, /* 8: Reserved */ + MPC_Handler, /* 9: MPC Combined (Secure) Handler */ + PPC_Handler, /* 10: PPC Combined (Secure) Handler */ + MSC_Handler, /* 11: MSC Combined (Secure) Handler */ + BRIDGE_ERROR_Handler, /* 12: Bridge Error (Secure) Handler */ + 0, /* 13: Reserved */ + MGMT_PPU_Handler, /* 14: MGMT PPU Handler */ + SYS_PPU_Handler, /* 15: SYS PPU Handler */ + CPU0_PPU_Handler, /* 16: CPU0 PPU Handler */ + 0, /* 17: Reserved */ + 0, /* 18: Reserved */ + 0, /* 19: Reserved */ + 0, /* 20: Reserved */ + 0, /* 21: Reserved */ + 0, /* 22: Reserved */ + 0, /* 23: Reserved */ + 0, /* 24: Reserved */ + 0, /* 25: Reserved */ + DEBUG_PPU_Handler, /* 26: DEBUG PPU Handler */ + TIMER3_AON_Handler, /* 27: TIMER 3 AON Handler */ + CPU0_CTI_0_Handler, /* 28: CPU0 CTI IRQ 0 Handler */ + CPU0_CTI_1_Handler, /* 29: CPU0 CTI IRQ 1 Handler */ + 0, /* 30: Reserved */ + 0, /* 31: Reserved */ + + /* External interrupts */ + System_Timestamp_Counter_Handler, /* 32: System timestamp counter Handler */ + UARTRX0_Handler, /* 33: UART 0 RX Handler */ + UARTTX0_Handler, /* 34: UART 0 TX Handler */ + UARTRX1_Handler, /* 35: UART 1 RX Handler */ + UARTTX1_Handler, /* 36: UART 1 TX Handler */ + UARTRX2_Handler, /* 37: UART 2 RX Handler */ + UARTTX2_Handler, /* 38: UART 2 TX Handler */ + UARTRX3_Handler, /* 39: UART 3 RX Handler */ + UARTTX3_Handler, /* 40: UART 3 TX Handler */ + UARTRX4_Handler, /* 41: UART 4 RX Handler */ + UARTTX4_Handler, /* 42: UART 4 TX Handler */ + UART0_Combined_Handler, /* 43: UART 0 Combined Handler */ + UART1_Combined_Handler, /* 44: UART 1 Combined Handler */ + UART2_Combined_Handler, /* 45: UART 2 Combined Handler */ + UART3_Combined_Handler, /* 46: UART 3 Combined Handler */ + UART4_Combined_Handler, /* 47: UART 4 Combined Handler */ + UARTOVF_Handler, /* 48: UART 0, 1, 2, 3, 4 & 5 Overflow Handler */ + ETHERNET_Handler, /* 49: Ethernet Handler */ + I2S_Handler, /* 50: Audio I2S Handler */ + TOUCH_SCREEN_Handler, /* 51: Touch Screen Handler */ + USB_Handler, /* 52: USB Handler */ + SPI_ADC_Handler, /* 53: SPI ADC Handler */ + SPI_SHIELD0_Handler, /* 54: SPI (Shield 0) Handler */ + SPI_SHIELD1_Handler, /* 55: SPI (Shield 0) Handler */ + ETHOS_U55_Handler, /* 56: Ethos-U55 Handler */ +#ifdef CORSTONE300_AN547 + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + DMA_Ch_1_Error_Handler, /* 60: DMA Ch1 Error Handler */ + DMA_Ch_1_Terminal_Count_Handler, /* 61: DMA Ch1 Terminal Count Handler */ + DMA_Ch_1_Combined_Handler, /* 62: DMA Ch1 Combined Handler */ + DMA_Ch_2_Error_Handler, /* 63: DMA Ch2 Error Handler */ + DMA_Ch_2_Terminal_Count_Handler, /* 64: DMA Ch2 Terminal Count Handler */ + DMA_Ch_2_Combined_Handler, /* 65: DMA Ch2 Combined Handler */ + DMA_Ch_3_Error_Handler, /* 66: DMA Ch3 Error Handler */ + DMA_Ch_3_Terminal_Count_Handler, /* 67: DMA Ch3 Terminal Count Handler */ + DMA_Ch_3_Combined_Handler, /* 68: DMA Ch3 Combined Handler */ +#else + 0, /* 57: Reserved */ + 0, /* 58: Reserved */ + 0, /* 59: Reserved */ + 0, /* 60: Reserved */ + 0, /* 61: Reserved */ + 0, /* 62: Reserved */ + 0, /* 63: Reserved */ + 0, /* 64: Reserved */ + 0, /* 65: Reserved */ + 0, /* 66: Reserved */ + 0, /* 67: Reserved */ + 0, /* 68: Reserved */ +#endif + GPIO0_Combined_Handler, /* 69: GPIO 0 Combined Handler */ + GPIO1_Combined_Handler, /* 70: GPIO 1 Combined Handler */ + GPIO2_Combined_Handler, /* 71: GPIO 2 Combined Handler */ + GPIO3_Combined_Handler, /* 72: GPIO 3 Combined Handler */ + GPIO0_0_Handler, /* 73: GPIO0 Pin 0 Handler */ + GPIO0_1_Handler, /* 74: GPIO0 Pin 1 Handler */ + GPIO0_2_Handler, /* 75: GPIO0 Pin 2 Handler */ + GPIO0_3_Handler, /* 76: GPIO0 Pin 3 Handler */ + GPIO0_4_Handler, /* 77: GPIO0 Pin 4 Handler */ + GPIO0_5_Handler, /* 78: GPIO0 Pin 5 Handler */ + GPIO0_6_Handler, /* 79: GPIO0 Pin 6 Handler */ + GPIO0_7_Handler, /* 80: GPIO0 Pin 7 Handler */ + GPIO0_8_Handler, /* 81: GPIO0 Pin 8 Handler */ + GPIO0_9_Handler, /* 82: GPIO0 Pin 9 Handler */ + GPIO0_10_Handler, /* 83: GPIO0 Pin 10 Handler */ + GPIO0_11_Handler, /* 84: GPIO0 Pin 11 Handler */ + GPIO0_12_Handler, /* 85: GPIO0 Pin 12 Handler */ + GPIO0_13_Handler, /* 86: GPIO0 Pin 13 Handler */ + GPIO0_14_Handler, /* 87: GPIO0 Pin 14 Handler */ + GPIO0_15_Handler, /* 88: GPIO0 Pin 15 Handler */ + GPIO1_0_Handler, /* 89: GPIO1 Pin 0 Handler */ + GPIO1_1_Handler, /* 90: GPIO1 Pin 1 Handler */ + GPIO1_2_Handler, /* 91: GPIO1 Pin 2 Handler */ + GPIO1_3_Handler, /* 92: GPIO1 Pin 3 Handler */ + GPIO1_4_Handler, /* 93: GPIO1 Pin 4 Handler */ + GPIO1_5_Handler, /* 94: GPIO1 Pin 5 Handler */ + GPIO1_6_Handler, /* 95: GPIO1 Pin 6 Handler */ + GPIO1_7_Handler, /* 96: GPIO1 Pin 7 Handler */ + GPIO1_8_Handler, /* 97: GPIO1 Pin 8 Handler */ + GPIO1_9_Handler, /* 98: GPIO1 Pin 9 Handler */ + GPIO1_10_Handler, /* 99: GPIO1 Pin 10 Handler */ + GPIO1_11_Handler, /* 100: GPIO1 Pin 11 Handler */ + GPIO1_12_Handler, /* 101: GPIO1 Pin 12 Handler */ + GPIO1_13_Handler, /* 102: GPIO1 Pin 13 Handler */ + GPIO1_14_Handler, /* 103: GPIO1 Pin 14 Handler */ + GPIO1_15_Handler, /* 104: GPIO1 Pin 15 Handler */ + GPIO2_0_Handler, /* 105: GPIO2 Pin 0 Handler */ + GPIO2_1_Handler, /* 106: GPIO2 Pin 1 Handler */ + GPIO2_2_Handler, /* 107: GPIO2 Pin 2 Handler */ + GPIO2_3_Handler, /* 108: GPIO2 Pin 3 Handler */ + GPIO2_4_Handler, /* 109: GPIO2 Pin 4 Handler */ + GPIO2_5_Handler, /* 110: GPIO2 Pin 5 Handler */ + GPIO2_6_Handler, /* 111: GPIO2 Pin 6 Handler */ + GPIO2_7_Handler, /* 112: GPIO2 Pin 7 Handler */ + GPIO2_8_Handler, /* 113: GPIO2 Pin 8 Handler */ + GPIO2_9_Handler, /* 114: GPIO2 Pin 9 Handler */ + GPIO2_10_Handler, /* 115: GPIO2 Pin 10 Handler */ + GPIO2_11_Handler, /* 116: GPIO2 Pin 11 Handler */ + GPIO2_12_Handler, /* 117: GPIO2 Pin 12 Handler */ + GPIO2_13_Handler, /* 118: GPIO2 Pin 13 Handler */ + GPIO2_14_Handler, /* 119: GPIO2 Pin 14 Handler */ + GPIO2_15_Handler, /* 120: GPIO2 Pin 15 Handler */ + GPIO3_0_Handler, /* 121: GPIO3 Pin 0 Handler */ + GPIO3_1_Handler, /* 122: GPIO3 Pin 1 Handler */ + GPIO3_2_Handler, /* 123: GPIO3 Pin 2 Handler */ + GPIO3_3_Handler, /* 124: GPIO3 Pin 3 Handler */ + UARTRX5_Handler, /* 125: UART 5 RX Interrupt */ + UARTTX5_Handler, /* 126: UART 5 TX Interrupt */ + UART5_Handler, /* 127: UART 5 combined Interrupt */ + 0, /* 128: Reserved */ + 0, /* 129: Reserved */ + 0, /* 130: Reserved */ +}; + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + +/*---------------------------------------------------------------------------- + Reset Handler called on controller reset + *----------------------------------------------------------------------------*/ +void Reset_Handler(void) +{ + __set_PSP((uint32_t)(&__INITIAL_SP)); + + __set_MSPLIM((uint32_t)(&__STACK_LIMIT)); + __set_PSPLIM((uint32_t)(&__STACK_LIMIT)); + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + __TZ_set_STACKSEAL_S((uint32_t *)(&__STACK_SEAL)); +#endif + + SystemInit(); /* CMSIS System Initialization */ + __PROGRAM_START(); /* Enter PreMain (C library entry point) */ +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c new file mode 100644 index 00000000..4e67d536 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2022 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 system_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ + #define XTAL (32000000UL) + #define SYSTEM_CLOCK (XTAL) + #define PERIPHERAL_CLOCK (25000000UL) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[496]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; +uint32_t PeripheralClock = PERIPHERAL_CLOCK; + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t)(&__VECTOR_TABLE[0]); +#endif + +#if (defined (__FPU_USED) && (__FPU_USED == 1U)) || \ + (defined (__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE > 0U)) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ + + /* Set low-power state for PDEPU */ + /* 0b00 | ON, PDEPU is not in low-power state */ + /* 0b01 | ON, but the clock is off */ + /* 0b10 | RET(ention) */ + /* 0b11 | OFF */ + + /* Clear ELPSTATE, value is 0b11 on Cold reset */ + PWRMODCTL->CPDLPSTATE &= ~(PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk); + + /* Favor best FP/MVE performance by default, avoid EPU switch-ON delays */ + /* PDEPU ON, Clock OFF */ + PWRMODCTL->CPDLPSTATE |= 0x1 << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos; +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + /* Enable Loop and branch info cache */ + SCB->CCR |= SCB_CCR_LOB_Msk; + __DSB(); + __ISB(); + + + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 new file mode 100644 index 00000000..4e67d536 --- /dev/null +++ b/dsppp/RTE/Device/SSE-300-MPS3/system_SSE300MPS3.c.base@1.1.1 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2022 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is derivative of CMSIS V5.9.0 system_ARMCM55.c + * Git SHA: 2b7495b8535bdcb306dac29b9ded4cfb679d7e5c + */ + +#include "SSE300MPS3.h" + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ + #define XTAL (32000000UL) + #define SYSTEM_CLOCK (XTAL) + #define PERIPHERAL_CLOCK (25000000UL) + +/*---------------------------------------------------------------------------- + Exception / Interrupt Vector table + *----------------------------------------------------------------------------*/ +extern const VECTOR_TABLE_Type __VECTOR_TABLE[496]; + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +uint32_t SystemCoreClock = SYSTEM_CLOCK; +uint32_t PeripheralClock = PERIPHERAL_CLOCK; + +/*---------------------------------------------------------------------------- + System Core Clock update function + *----------------------------------------------------------------------------*/ +void SystemCoreClockUpdate (void) +{ + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} + +/*---------------------------------------------------------------------------- + System initialization function + *----------------------------------------------------------------------------*/ +void SystemInit (void) +{ +#if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U) + SCB->VTOR = (uint32_t)(&__VECTOR_TABLE[0]); +#endif + +#if (defined (__FPU_USED) && (__FPU_USED == 1U)) || \ + (defined (__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE > 0U)) + SCB->CPACR |= ((3U << 10U*2U) | /* enable CP10 Full Access */ + (3U << 11U*2U) ); /* enable CP11 Full Access */ + + /* Set low-power state for PDEPU */ + /* 0b00 | ON, PDEPU is not in low-power state */ + /* 0b01 | ON, but the clock is off */ + /* 0b10 | RET(ention) */ + /* 0b11 | OFF */ + + /* Clear ELPSTATE, value is 0b11 on Cold reset */ + PWRMODCTL->CPDLPSTATE &= ~(PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk); + + /* Favor best FP/MVE performance by default, avoid EPU switch-ON delays */ + /* PDEPU ON, Clock OFF */ + PWRMODCTL->CPDLPSTATE |= 0x1 << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos; +#endif + +#ifdef UNALIGNED_SUPPORT_DISABLE + SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk; +#endif + + /* Enable Loop and branch info cache */ + SCB->CCR |= SCB_CCR_LOB_Msk; + __DSB(); + __ISB(); + + + SystemCoreClock = SYSTEM_CLOCK; + PeripheralClock = PERIPHERAL_CLOCK; +} diff --git a/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct b/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct new file mode 100644 index 00000000..4d6e579d --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/ac6_linker_script.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld b/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld new file mode 100644 index 00000000..40f955c1 --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/clang_linker_script.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld b/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld new file mode 100644 index 00000000..a018e5d4 --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/gcc_linker_script.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h b/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h new file mode 100644 index 00000000..cf6b28cc --- /dev/null +++ b/dsppp/RTE/Device/SSE_300_MPS3/regions_V2M_MPS3_SSE_300_FVP.h @@ -0,0 +1,400 @@ +#ifndef REGIONS_V2M_MPS3_SSE_300_FVP_H +#define REGIONS_V2M_MPS3_SSE_300_FVP_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// + +// RAM Configuration +// ======================= +// IROM1=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x10000000 +#define __RAM0_BASE 0x10000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00200000 +#define __RAM0_SIZE 0x00200000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// IROM2=<__RAM1> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __RAM1_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00200000 +#define __RAM1_SIZE 0x00200000 +// Default region +// Enables memory region globally for the application. +#define __RAM1_DEFAULT 0 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM1_NOINIT 0 +// + +// IRAM1=<__RAM2> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30000000 +#define __RAM2_BASE 0x30000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM2_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM2_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM2_NOINIT 0 +// + +// IRAM2=<__RAM3> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM3_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM3_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM3_DEFAULT 0 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM3_NOINIT 0 +// + +// ITCM_NS=<__RAM4> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __RAM4_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00080000 +#define __RAM4_SIZE 0x00080000 +// Default region +// Enables memory region globally for the application. +#define __RAM4_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM4_NOINIT 0 +// + +// SRAM_NS=<__RAM5> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x01000000 +#define __RAM5_BASE 0x01000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM5_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM5_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM5_NOINIT 0 +// + +// DTCM0_NS=<__RAM6> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM6_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM6_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM6_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM6_NOINIT 0 +// + +// DTCM1_NS=<__RAM7> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20020000 +#define __RAM7_BASE 0x20020000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM7_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM7_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM7_NOINIT 0 +// + +// DTCM2_NS=<__RAM8> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20040000 +#define __RAM8_BASE 0x20040000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM8_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM8_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM8_NOINIT 0 +// + +// DTCM3_NS=<__RAM9> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20060000 +#define __RAM9_BASE 0x20060000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM9_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM9_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM9_NOINIT 0 +// + +// ISRAM0_NS=<__RAM10> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x21000000 +#define __RAM10_BASE 0x21000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM10_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM10_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM10_NOINIT 0 +// + +// ISRAM1_NS=<__RAM11> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x21100000 +#define __RAM11_BASE 0x21100000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM11_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM11_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM11_NOINIT 0 +// + +// QSPI_SRAM_NS=<__RAM12> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x28000000 +#define __RAM12_BASE 0x28000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00800000 +#define __RAM12_SIZE 0x00800000 +// Default region +// Enables memory region globally for the application. +#define __RAM12_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM12_NOINIT 0 +// + +// ITCM_S=<__RAM13> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x10000000 +#define __RAM13_BASE 0x10000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00080000 +#define __RAM13_SIZE 0x00080000 +// Default region +// Enables memory region globally for the application. +#define __RAM13_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM13_NOINIT 0 +// + +// SRAM_S=<__RAM14> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x11000000 +#define __RAM14_BASE 0x11000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM14_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM14_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM14_NOINIT 0 +// + +// DTCM0_S=<__RAM15> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30000000 +#define __RAM15_BASE 0x30000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM15_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM15_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM15_NOINIT 0 +// + +// DTCM1_S=<__RAM16> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30020000 +#define __RAM16_BASE 0x30020000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM16_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM16_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM16_NOINIT 0 +// + +// DTCM2_S=<__RAM17> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30040000 +#define __RAM17_BASE 0x30040000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM17_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM17_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM17_NOINIT 0 +// + +// DTCM3_S=<__RAM18> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x30060000 +#define __RAM18_BASE 0x30060000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM18_SIZE 0x00020000 +// Default region +// Enables memory region globally for the application. +#define __RAM18_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM18_NOINIT 0 +// + +// ISRAM0_S=<__RAM19> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x31000000 +#define __RAM19_BASE 0x31000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM19_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM19_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM19_NOINIT 0 +// + +// ISRAM1_S=<__RAM20> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x31100000 +#define __RAM20_BASE 0x31100000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00100000 +#define __RAM20_SIZE 0x00100000 +// Default region +// Enables memory region globally for the application. +#define __RAM20_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM20_NOINIT 0 +// + +// QSPI_SRAM_S=<__RAM21> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x38000000 +#define __RAM21_BASE 0x38000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00800000 +#define __RAM21_SIZE 0x00800000 +// Default region +// Enables memory region globally for the application. +#define __RAM21_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM21_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00000200 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_V2M_MPS3_SSE_300_FVP_H */ diff --git a/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h b/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h new file mode 100644 index 00000000..332d6e45 --- /dev/null +++ b/dsppp/RTE/_Release_IPSS_M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+IPSS_M0P' + * Target: 'Release+IPSS_M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h b/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h new file mode 100644 index 00000000..747232d2 --- /dev/null +++ b/dsppp/RTE/_Release_IPSS_M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+IPSS_M4' + * Target: 'Release+IPSS_M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h new file mode 100644 index 00000000..cd99d204 --- /dev/null +++ b/dsppp/RTE/_Release_LLVM-Corstone-300/RTE_Components.h @@ -0,0 +1,23 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+LLVM-Corstone-300' + * Target: 'Release+LLVM-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + +/* ARM::CMSIS-Compiler:STDOUT:Custom@1.0.0 */ +#define RTE_CMSIS_Compiler_STDOUT /* CMSIS-Compiler STDOUT */ + #define RTE_CMSIS_Compiler_STDOUT_Custom /* CMSIS-Compiler STDOUT: Custom */ + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h new file mode 100644 index 00000000..d7da60e1 --- /dev/null +++ b/dsppp/RTE/_Release_MPS3-Corstone-300/RTE_Components.h @@ -0,0 +1,25 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+MPS3-Corstone-300' + * Target: 'Release+MPS3-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + +/* ARM::CMSIS Driver:USART@1.0.0 */ +#define RTE_Drivers_USART +/* ARM::CMSIS-Compiler:STDOUT:Custom@1.0.0 */ +#define RTE_CMSIS_Compiler_STDOUT /* CMSIS-Compiler STDOUT */ + #define RTE_CMSIS_Compiler_STDOUT_Custom /* CMSIS-Compiler STDOUT: Custom */ + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h b/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h new file mode 100644 index 00000000..c326941f --- /dev/null +++ b/dsppp/RTE/_Release_VHT-Corstone-300/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-Corstone-300' + * Target: 'Release+VHT-Corstone-300' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "SSE300MPS3.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h b/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h new file mode 100644 index 00000000..8a0db96b --- /dev/null +++ b/dsppp/RTE/_Release_VHT-M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-M0P' + * Target: 'Release+VHT-M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT-M4/RTE_Components.h b/dsppp/RTE/_Release_VHT-M4/RTE_Components.h new file mode 100644 index 00000000..4c34863c --- /dev/null +++ b/dsppp/RTE/_Release_VHT-M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT-M4' + * Target: 'Release+VHT-M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h b/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h new file mode 100644 index 00000000..768bae44 --- /dev/null +++ b/dsppp/RTE/_Release_VHT_M0P/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT_M0P' + * Target: 'Release+VHT_M0P' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM0plus.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/RTE/_Release_VHT_M4/RTE_Components.h b/dsppp/RTE/_Release_VHT_M4/RTE_Components.h new file mode 100644 index 00000000..44e1e938 --- /dev/null +++ b/dsppp/RTE/_Release_VHT_M4/RTE_Components.h @@ -0,0 +1,20 @@ +/* + * CSOLUTION generated file: DO NOT EDIT! + * Generated by: csolution version 2.2.1 + * + * Project: 'test.Release+VHT_M4' + * Target: 'Release+VHT_M4' + */ + +#ifndef RTE_COMPONENTS_H +#define RTE_COMPONENTS_H + + +/* + * Define the Device Header File: + */ +#define CMSIS_device_header "ARMCM4.h" + + + +#endif /* RTE_COMPONENTS_H */ diff --git a/dsppp/allocator.cpp b/dsppp/allocator.cpp new file mode 100644 index 00000000..aaf61cc5 --- /dev/null +++ b/dsppp/allocator.cpp @@ -0,0 +1,98 @@ +#include "allocator.h" + +#define ALLOC_POOL(BYTES,NB) \ +MemoryPool vecPool_##BYTES(NB); + +#if defined(POOL_ALLOCATOR) +#include "allocation/all.cpp" +#endif + +std::map current_stats; +std::map max_stats; +std::map current_dyn_stats; + +void print_map(std::string comment) +{ + + std::cout << comment << "\r\n"; +#if !defined(POOL_ALLOCATOR) + std::size_t total_static=0; + std::size_t total_dynamic=0; + + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first > 0) + { + std::cout << "ALLOC_POOL(" << v.first << "," << v.second << "); \r\n"; + total_static += v.first * v.second; + } + } + + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first > 0) + { + std::cout << "POOL(" << v.first << "); \r\n"; + } + } + + std::cout << "\r\n"; + + std::cout << "Total static bytes: " << total_static << std::hex << " (0x" << total_static << ")\r\n"; + + total_dynamic = 0; + std::cout << "\r\nDynamic allocations\r\n"; + for (const auto v : max_stats) + { + // Only count allocations with size known at build time + if (v.first < 0) + { + // Count is meaningless for dynamic allocation + // since we can track the destroy (destroy has no length + // argument contrary to allocate and so can only get + // the length from the static value). + std::cout << std::dec << -v.first << " : " << v.second << "\r\n"; + total_dynamic += (-v.first) * v.second; + } + } + std::cout << "Total dynamic bytes: " << total_dynamic << std::hex << " (0x" << total_dynamic << ")\r\n"; + std::cout << "Total bytes: " << (total_static+total_dynamic) << std::hex << " (0x" << (total_static+total_dynamic) << ")\r\n"; + + +#endif +} + +void reset_current_stats() +{ +#if !defined(POOL_ALLOCATOR) + for (auto v : current_stats) + { + v.second = 0; + } +#endif +} + +void check_current_stats() +{ +#if !defined(POOL_ALLOCATOR) + for (const auto v : current_stats) + { + if (v.second > 0) + { + if (v.first>0) + { + std::cout << "Error memory pool " << v.first << " not empty = " << v.second << "\r\n"; + } + else + { + std::cout << "Error dynamic alloc " << -v.first << " not empty = " << v.second << "\r\n"; + } + } + } + + reset_current_stats(); +#endif +} + diff --git a/dsppp/allocator.h b/dsppp/allocator.h new file mode 100644 index 00000000..61e95006 --- /dev/null +++ b/dsppp/allocator.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include +#include +#include "test_config.h" + + +// Allocator for temporaries +#if defined(POOL_ALLOCATOR) +#define TMP_ALLOC pool_allocator +#else +#define TMP_ALLOC stat_allocator +#endif + +#include + + +using namespace arm_cmsis_dsp; + + +constexpr int NBVEC_2 = 2; +constexpr int NBVEC_3 = 3; +constexpr int NBVEC_4 = 4; +constexpr int NBVEC_8 = 8; +constexpr int NBVEC_9 = 9; +constexpr int NBVEC_16 = 16; +constexpr int NBVEC_32 = 32; +constexpr int NBVEC_44 = 44; +constexpr int NBVEC_47 = 47; +constexpr int NBVEC_64 = 64; +constexpr int NBVEC_128 = 128; +constexpr int NBVEC_256 = 256; +constexpr int NBVEC_258 = 258; +constexpr int NBVEC_512 = 512; +constexpr int NBVEC_1024 = 1024; +constexpr int NBVEC_2048 = 2048; + + +template +struct pool_allocator; + +#define POOL(BYTES) \ +constexpr int POOL_BLOCK_##BYTES = BYTES; \ +extern MemoryPool vecPool_##BYTES;\ +template<> \ +struct pool_allocator { \ + static char* allocate () noexcept{ \ + return(vecPool_##BYTES.get_new_buffer()); \ + } \ + \ + static void destroy ( char* ptr ) noexcept { \ + vecPool_##BYTES.recycle_buffer(ptr); \ + } \ + \ +}; + + +#if defined(POOL_ALLOCATOR) +#include "allocation/all.h" +#endif + +template<> +struct pool_allocator { + /* Dynamic size allocations */ + static char* allocate ( std::size_t sz) noexcept{ + return(reinterpret_cast(std::malloc(sz))); + } + + static void destroy ( char* ptr ) noexcept { + std::free(ptr); + } + +}; + +extern std::map current_stats; +extern std::map max_stats; +extern std::map current_dyn_stats; + + +template +struct stat_allocator { + + /* Dynamic allocations */ + static char* allocate ( std::size_t sz) noexcept{ + current_stats[-sz]++; + if (current_stats[-sz]>max_stats[-sz]) + { + max_stats[-sz] = current_stats[-sz]; + } + void *ptr = std::malloc(sz); + current_dyn_stats[ptr]=sz; + return(reinterpret_cast(ptr)); + } + + /* Size known at build time */ + static char* allocate () noexcept{ + current_stats[L]++; + if (current_stats[L]>max_stats[L]) + { + max_stats[L] = current_stats[L]; + } + return(reinterpret_cast(std::malloc(L))); + } + + static void destroy ( char* ptr ) noexcept { + if (L<0) + { + std::size_t sz = current_dyn_stats[ptr]; + current_stats[-sz]--; + } + else + { + current_stats[L]--; + } + std::free(ptr); + } + +}; + +extern void print_map(std::string comment); +extern void check_current_stats(); +extern void reset_current_stats(); diff --git a/dsppp/cdefault.yml b/dsppp/cdefault.yml new file mode 100644 index 00000000..0ede69af --- /dev/null +++ b/dsppp/cdefault.yml @@ -0,0 +1,142 @@ +default: + + compiler: AC6 + + misc: + - for-compiler: AC6 + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-variable-declarations + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-nonportable-include-path + - -Wno-reserved-id-macro + - -Wno-unused-macros + - -Wno-documentation-unknown-command + - -Wno-documentation + - -Wno-license-management + - -Wno-parentheses-equality + - -Wno-reserved-identifier + - -ffunction-sections + - -Wno-nan-infinity-disabled + - -DARM_MATH_LOOPUNROLL + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + - -mllvm -disable-vector-combine + ASM: + - -masm=auto + Link: + - --entry=Reset_Handler + - --info=summarysizes + - --info=sizes + - --info=totals + - --info=unused + - --info=veneers + + - for-compiler: GCC + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-unused-macros + - -ffunction-sections + - -DARM_MATH_LOOPUNROLL + - -flax-vector-conversions + - -Wno-maybe-uninitialized + - -fdata-sections + - -fno-unroll-loops + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + - -fdata-sections + - -Wno-psabi + - -fno-unroll-loops + ASM: + - -masm=auto + Link: + - --specs=nano.specs + - -Wl,-Map=$elf()$.map + - -lm + - -Wl,--wrap=SysTick_Handler + - -Wl,--gc-sections + Library: + - -lm + + - for-compiler: CLANG + C: + - -Wsign-compare + - -Wdouble-promotion + - -DNDEBUG + - -Wall + - -Wextra + - -Werror + - -std=c11 + - -Ofast + - -ffast-math + - -Wno-packed + - -Wno-missing-variable-declarations + - -Wno-missing-prototypes + - -Wno-missing-noreturn + - -Wno-sign-conversion + - -Wno-nonportable-include-path + - -Wno-reserved-id-macro + - -Wno-unused-macros + - -Wno-documentation-unknown-command + - -Wno-documentation + - -Wno-parentheses-equality + - -Wno-reserved-identifier + - -ffunction-sections + - -DARM_MATH_LOOPUNROLL + CPP: + - -fno-rtti + - -fno-exceptions + - -DNDEBUG + - -Wall + - -Wextra + - -std=c++17 + - -Ofast + - -ffast-math + - -Wno-unused-function + - -ffunction-sections + ASM: + - -masm=auto + Link: + - -Wl,-Map=$elf()$.map + - -Wl,--gc-sections diff --git a/dsppp/clang_sse300.c b/dsppp/clang_sse300.c new file mode 100644 index 00000000..c6470905 --- /dev/null +++ b/dsppp/clang_sse300.c @@ -0,0 +1,65 @@ +#include "RTE_Components.h" +#include + +#include "Driver_USART.h" +#include "stdout_USART.h" + + + + +static int stdin_getc(FILE *file) { + (void)file; + return(0); +} + + +// iostream has references to stdin and stderr and there is a link +// error if not defined. +static FILE __stdin = FDEV_SETUP_STREAM(NULL, + stdin_getc, + NULL, + _FDEV_SETUP_READ); +FILE *const stdin = &__stdin; + +static int stderr_putc(char c, FILE *file) { + (void)file; + return(0); +} + +static FILE __stderr = FDEV_SETUP_STREAM(stderr_putc, + NULL, + NULL, + _FDEV_SETUP_WRITE); +FILE *const stderr = &__stderr; + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// STDOUT USART Interface + +// Connect to hardware via Driver_USART# <0-255> +// Select driver control block for USART interface +#define USART_DRV_NUM 0 + +// Baudrate +#define USART_BAUDRATE 115200 + +// + + +#define _USART_Driver_(n) Driver_USART##n +#define USART_Driver_(n) _USART_Driver_(n) + +extern ARM_DRIVER_USART USART_Driver_(USART_DRV_NUM); +#define ptrUSART (&USART_Driver_(USART_DRV_NUM)) + +int stdout_putchar(const unsigned char ch) { + uint8_t buf[1]; + + buf[0] = ch; + if (ptrUSART->Send(buf, 1) != ARM_DRIVER_OK) { + return (-1); + } + while (ptrUSART->GetTxCount() != 1); + return (ch); +} + diff --git a/dsppp/example.cproject.yml b/dsppp/example.cproject.yml new file mode 100644 index 00000000..0e41fef7 --- /dev/null +++ b/dsppp/example.cproject.yml @@ -0,0 +1,120 @@ +project: + groups: + - group: Examples + files: + #- file: Examples/dot_product.cpp + #- file: Examples/vector_op.cpp + - file: Examples/matrix_op.cpp + - file: clang_sse300.c + for-context: + - +MPS3-Corstone-300 + for-compiler: + - CLANG + add-path: + - Include + - Examples + + components: + - component: ARM::CMSIS:CORE + - component: ARM::CMSIS:DSP@1.15.0 + - component: ARM::Device:Startup&C Startup + for-context: + - +VHT-Corstone-300 + - +VHT-M0P + - +VHT-M4 + - +MPS3-Corstone-300 + - component: ARM::Device:Definition + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:CORE + for-context: + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:STDOUT:Custom@1.0.0 + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:USART STDOUT + for-context: + - +MPS3-Corstone-300 + - component: ARM::CMSIS Driver:USART + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysCounter + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysTimer + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:Timeout + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:UART + for-context: + - +MPS3-Corstone-300 + + linker: + - script: linker_scripts/gcc_sse300_mps3.ld + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: GCC + + - script: linker_scripts/clang_sse300_mps3.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: CLANG + + - script: linker_scripts/ac6_sse300_mps3_s.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: AC6 + + - regions: linker_scripts/SSE-300-MPS3/region_defs.h + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + + - script: linker_scripts/gcc_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: GCC + + - script: linker_scripts/clang_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: CLANG + + - script: linker_scripts/ac6_m0p_mps3_s.sct + for-context: + - +VHT-M0P + for-compiler: AC6 + + - regions: linker_scripts/ARMCM0P/region_defs.h + for-context: + - +VHT-M0P + + - script: linker_scripts/gcc_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: GCC + + - script: linker_scripts/clang_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: CLANG + + - script: linker_scripts/ac6_m4_mps3_s.sct + for-context: + - +VHT-M4 + for-compiler: AC6 + + - regions: linker_scripts/ARMCM4/region_defs.h + for-context: + - +VHT-M4 + + diff --git a/dsppp/fvp_configs/VHT-Corstone-300.txt b/dsppp/fvp_configs/VHT-Corstone-300.txt new file mode 100644 index 00000000..e352bec1 --- /dev/null +++ b/dsppp/fvp_configs/VHT-Corstone-300.txt @@ -0,0 +1,9 @@ +core_clk.mul=100000000 +cpu0.semihosting-enable=1 +cpu0.semihosting-heap_base=0x0 +cpu0.semihosting-heap_limit=0x0 +cpu0.semihosting-stack_base=0x0 +cpu0.semihosting-stack_limit=0x0 +cpu0.FPU=1 +cpu0.MVE=2 +mps3_board.visualisation.disable-visualisation=1 diff --git a/dsppp/fvp_configs/VHT-M0P.txt b/dsppp/fvp_configs/VHT-M0P.txt new file mode 100644 index 00000000..4892c1e4 --- /dev/null +++ b/dsppp/fvp_configs/VHT-M0P.txt @@ -0,0 +1,3 @@ +fvp_mps2.mps2_visualisation.disable-visualisation=1 +armcortexm0plusct.semihosting-enable=1 +armcortexm0plusct.NUM_MPU_REGION=0x8 diff --git a/dsppp/fvp_configs/VHT-M4.txt b/dsppp/fvp_configs/VHT-M4.txt new file mode 100644 index 00000000..fda8c024 --- /dev/null +++ b/dsppp/fvp_configs/VHT-M4.txt @@ -0,0 +1,3 @@ +fvp_mps2.mps2_visualisation.disable-visualisation=1 +armcortexm4ct.semihosting-enable=1 +armcortexm4ct.vfp-present=1 diff --git a/dsppp/getserial.py b/dsppp/getserial.py new file mode 100644 index 00000000..d3f7e678 --- /dev/null +++ b/dsppp/getserial.py @@ -0,0 +1,28 @@ +import serial +import re +import io +from pyocd.core.target import Target + +lines = [] + +def read_stdout(target): + print("Waiting for serial") + lines = [] + + with serial.Serial('COM6', 115200, timeout=1,parity=serial.PARITY_NONE) as ser: + sio = io.TextIOWrapper(ser) + DONE = False + target.reset() + while not DONE: + line = sio.readline() + if len(line)==0: + raise Exception('Timeout error') + if re.match(r'Stats',line): + DONE=True + else: + #print(line) + lines.append(line) + + + return(lines) + \ No newline at end of file diff --git a/dsppp/linker_scripts/ARMCM0P/region_defs.h b/dsppp/linker_scripts/ARMCM0P/region_defs.h new file mode 100644 index 00000000..b66150bb --- /dev/null +++ b/dsppp/linker_scripts/ARMCM0P/region_defs.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM0P_H +#define REGIONS_ARMCM0P_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00002000 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_ARMCM0P_H */ diff --git a/dsppp/linker_scripts/ARMCM4/region_defs.h b/dsppp/linker_scripts/ARMCM4/region_defs.h new file mode 100644 index 00000000..dc63f5bb --- /dev/null +++ b/dsppp/linker_scripts/ARMCM4/region_defs.h @@ -0,0 +1,60 @@ +#ifndef REGIONS_ARMCM4_H +#define REGIONS_ARMCM4_H + + +//-------- <<< Use Configuration Wizard in Context Menu >>> -------------------- + +// Device pack: ARM::Cortex_DFP@1.0.0 +// Device pack used to generate this file + +// ROM Configuration +// ======================= +// ROM=<__ROM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x00000000 +#define __ROM0_BASE 0x00000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00040000 +#define __ROM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __ROM0_DEFAULT 1 +// Startup +// Selects region to be used for startup code. +#define __ROM0_STARTUP 1 +// + +// + +// RAM Configuration +// ======================= +// RAM=<__RAM0> +// Base address <0x0-0xFFFFFFFF:8> +// Defines base address of memory region. +// Default: 0x20000000 +#define __RAM0_BASE 0x20000000 +// Region size [bytes] <0x0-0xFFFFFFFF:8> +// Defines size of memory region. +// Default: 0x00020000 +#define __RAM0_SIZE 0x00040000 +// Default region +// Enables memory region globally for the application. +#define __RAM0_DEFAULT 1 +// No zero initialize +// Excludes region from zero initialization. +#define __RAM0_NOINIT 0 +// + +// + +// Stack / Heap Configuration +// Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> +// Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> +#define __STACK_SIZE 0x00002000 +#define __HEAP_SIZE 0x00038000 +// + + +#endif /* REGIONS_ARMCM4_H */ diff --git a/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h b/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h new file mode 100644 index 00000000..32ac16b3 --- /dev/null +++ b/dsppp/linker_scripts/SSE-300-MPS3/region_defs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_DEFS_H__ +#define __REGION_DEFS_H__ + +#include "region_limits.h" + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure regions */ +#define S_CODE_START ( S_ROM_ALIAS ) +#define S_CODE_SIZE ( TOTAL_S_ROM_SIZE ) +#define S_CODE_LIMIT ( S_CODE_START + S_CODE_SIZE ) + +#define S_DATA_START ( S_RAM_ALIAS ) +#define S_DATA_SIZE ( TOTAL_S_RAM_SIZE ) +#define S_DATA_LIMIT ( S_DATA_START + S_DATA_SIZE ) + +#define S_DDR4_START ( S_DDR4_ALIAS ) +#define S_DDR4_SIZE ( TOTAL_S_DDR4_SIZE ) +#define S_DDR4_LIMIT ( S_DDR4_START + S_DDR4_SIZE ) + +#endif /* __REGION_DEFS_H__ */ diff --git a/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h b/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h new file mode 100644 index 00000000..0d600a36 --- /dev/null +++ b/dsppp/linker_scripts/SSE-300-MPS3/region_limits.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018-2022 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __REGION_LIMITS_H__ +#define __REGION_LIMITS_H__ + +/* ************************************************************** + * WARNING: this file is parsed both by the C/C++ compiler + * and the linker. As a result the syntax must be valid not only + * for C/C++ but for the linker scripts too. + * Beware of the following limitations: + * - LD (GCC linker) requires white space around operators. + * - UL postfix for macros is not suported by the linker script + ****************************************************************/ + +/* Secure Code */ +#define S_ROM_ALIAS (0x10000000) /* ITCM_BASE_S */ +#define TOTAL_S_ROM_SIZE (0x00080000) /* 512 kB */ + +/* Secure Data */ +#define S_RAM_ALIAS (0x30000000) /* DTCM_BASE_S */ +#define TOTAL_S_RAM_SIZE (0x00080000) /* 512 kB */ + +/* Secure DDR4 */ +#define S_DDR4_ALIAS (0x70000000) /* DDR4_BLK1_BASE_S */ +#define TOTAL_S_DDR4_SIZE (0x10000000) /* 256 MB */ + +/* Heap and Stack sizes for secure and nonsecure applications */ +#define HEAP_SIZE (0x00038000) /* 1 KiB */ +#define STACK_SIZE (0x00002000) /* 1 KiB */ + +#endif /* __REGION_LIMITS_H__ */ diff --git a/dsppp/linker_scripts/ac6_m0p_mps3_s.sct b/dsppp/linker_scripts/ac6_m0p_mps3_s.sct new file mode 100644 index 00000000..4d6e579d --- /dev/null +++ b/dsppp/linker_scripts/ac6_m0p_mps3_s.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/linker_scripts/ac6_m4_mps3_s.sct b/dsppp/linker_scripts/ac6_m4_mps3_s.sct new file mode 100644 index 00000000..4d6e579d --- /dev/null +++ b/dsppp/linker_scripts/ac6_m4_mps3_s.sct @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/*---------------------------------------------------------------------------- + Scatter File Definitions definition + *----------------------------------------------------------------------------*/ + +LR_ROM0 __ROM0_BASE __ROM0_SIZE { + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + ER_CMSE_VENEER __ROM0_BASE+__ROM0_SIZE -__ROM0_SIZE { + *(Veneer$$CMSE) + } + #define ER_CMSE_VENEER_SIZE AlignExpr(ImageLength(ER_CMSE_VENEER), 8) +#else + #define ER_CMSE_VENEER_SIZE 0 +#endif + + ER_ROM0 __ROM0_BASE (__ROM0_SIZE - ER_CMSE_VENEER_SIZE) { + *.o (RESET, +First) + *(InRoot$$Sections) + *(+RO +XO) + } + + RW_NOINIT __RAM0_BASE UNINIT (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE) { + *(.bss.noinit) + } + + RW_RAM0 AlignExpr(+0, 8) (__RAM0_SIZE - __HEAP_SIZE - __STACK_SIZE - AlignExpr(ImageLength(RW_NOINIT), 8)) { + *(+RW +ZI) + } + +#if __HEAP_SIZE > 0 + ARM_LIB_HEAP (AlignExpr(+0, 8)) EMPTY __HEAP_SIZE { ; Reserve empty region for heap + } +#endif + + ARM_LIB_STACK (__RAM0_BASE + __RAM0_SIZE - __STACKSEAL_SIZE) EMPTY -__STACK_SIZE { ; Reserve empty region for stack + } + +#if __STACKSEAL_SIZE > 0 + STACKSEAL +0 EMPTY 8 { ; Reserve empty region for stack seal immediately after stack + } +#endif + +#if __RAM1_SIZE > 0 + RW_RAM1 __RAM1_BASE __RAM1_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM2_SIZE > 0 + RW_RAM2 __RAM2_BASE __RAM2_SIZE { + .ANY (+RW +ZI) + } +#endif + +#if __RAM3_SIZE > 0 + RW_RAM3 __RAM3_BASE __RAM3_SIZE { + .ANY (+RW +ZI) + } +#endif +} + +#if __ROM1_SIZE > 0 +LR_ROM1 __ROM1_BASE __ROM1_SIZE { + ER_ROM1 +0 __ROM1_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM2_SIZE > 0 +LR_ROM2 __ROM2_BASE __ROM2_SIZE { + ER_ROM2 +0 __ROM2_SIZE { + .ANY (+RO +XO) + } +} +#endif + +#if __ROM3_SIZE > 0 +LR_ROM3 __ROM3_BASE __ROM3_SIZE { + ER_ROM3 +0 __ROM3_SIZE { + .ANY (+RO +XO) + } +} +#endif diff --git a/dsppp/linker_scripts/ac6_sse300_mps3_s.sct b/dsppp/linker_scripts/ac6_sse300_mps3_s.sct new file mode 100644 index 00000000..6712e5cc --- /dev/null +++ b/dsppp/linker_scripts/ac6_sse300_mps3_s.sct @@ -0,0 +1,79 @@ + +;/* +; * Copyright (c) 2018-2021 Arm Limited. All rights reserved. +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + +;#include "region_defs.h" + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + /* different test vectors */ + * (InRoot$$Sections) + } + + /* + * Place the CMSE Veneers (containing the SG instruction) after the code, in + * a separate 32 bytes aligned region so that the SAU can programmed to just + * set this region as Non-Secure Callable. The maximum size of this + * executable region makes it only used the space left over by the ER_CODE + * region so that you can rely on code+veneer size combined will not exceed + * the S_CODE_SIZE value. We also substract from the available space the + * area used to align this section on 32 bytes boundary (for SAU conf). + */ + ER_CODE_CMSE_VENEER +0 ALIGN 32 { + *(Veneer$$CMSE) + } + /* + * This dummy region ensures that the next one will be aligned on a 32 bytes + * boundary, so that the following region will not be mistakenly configured + * as Non-Secure Callable by the SAU. + */ + ER_CODE_CMSE_VENEER_DUMMY +0 ALIGN 32 EMPTY 0 {} + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW +RO-DATA) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE { ; Reserve empty region for stack + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/linker_scripts/ac6_sse310_mps3_s.sct b/dsppp/linker_scripts/ac6_sse310_mps3_s.sct new file mode 100644 index 00000000..0650639f --- /dev/null +++ b/dsppp/linker_scripts/ac6_sse310_mps3_s.sct @@ -0,0 +1,60 @@ + +;/* +; * Copyright (c) 2018-2021 Arm Limited +; * +; * Licensed under the Apache License, Version 2.0 (the "License"); +; * you may not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * http://www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an "AS IS" BASIS, +; * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; */ + + + +LR_CODE S_CODE_START { + ER_CODE S_CODE_START { + *.o (RESET +First) + .ANY (+RO) + /* different test vectors */ + * (InRoot$$Sections) + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + CODE_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(CODE_WATERMARK) <= S_CODE_START + S_CODE_SIZE) + + ER_DATA S_DATA_START { + .ANY (+ZI +RW +RO-DATA) + } + + #if HEAP_SIZE > 0 + ARM_LIB_HEAP +0 ALIGN 8 EMPTY HEAP_SIZE { ; Reserve empty region for heap + } + #endif + + ARM_LIB_STACK +0 ALIGN 32 EMPTY STACK_SIZE { ; Reserve empty region for stack + } + + /* This empty, zero long execution region is here to mark the limit address + * of the last execution region that is allocated in SRAM. + */ + SRAM_WATERMARK +0 EMPTY 0x0 { + } + /* Make sure that the sections allocated in the SRAM does not exceed the + * size of the SRAM available. + */ + ScatterAssert(ImageLimit(SRAM_WATERMARK) <= S_DATA_START + S_DATA_SIZE) +} diff --git a/dsppp/linker_scripts/clang_m0p_mps3.ld b/dsppp/linker_scripts/clang_m0p_mps3.ld new file mode 100644 index 00000000..40f955c1 --- /dev/null +++ b/dsppp/linker_scripts/clang_m0p_mps3.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_m4_mps3.ld b/dsppp/linker_scripts/clang_m4_mps3.ld new file mode 100644 index 00000000..40f955c1 --- /dev/null +++ b/dsppp/linker_scripts/clang_m4_mps3.ld @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + *(.gnu.linkonce.r.*) + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef __HEAP_SIZE + PROVIDE (__heap_end = __heap_start + __HEAP_SIZE); + PROVIDE (__heap_size = __HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - __STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - __STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += __STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack) + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_sse300_mps3.sct b/dsppp/linker_scripts/clang_sse300_mps3.sct new file mode 100644 index 00000000..62352193 --- /dev/null +++ b/dsppp/linker_scripts/clang_sse300_mps3.sct @@ -0,0 +1,364 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + 0x000000 +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE + 0x000000 +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + *(.gnu.linkonce.r.*) + + + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .veneers : + { + . = ALIGN(32); + KEEP(*(.gnu.sgstubs)) + } > ROM0 AT>ROM0 :text +#endif + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef HEAP_SIZE + PROVIDE (__heap_end = __heap_start + HEAP_SIZE); + PROVIDE (__heap_size = HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack); + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/clang_sse310_mps3.sct b/dsppp/linker_scripts/clang_sse310_mps3.sct new file mode 100644 index 00000000..3f487716 --- /dev/null +++ b/dsppp/linker_scripts/clang_sse310_mps3.sct @@ -0,0 +1,363 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright © 2019 Keith Packard + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx!w) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE + 0x000000 +#if __ROM1_SIZE > 0 + ROM1 (rx!w) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx!w) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx!w) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (w!rx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE + 0x000000 +#if __RAM1_SIZE > 0 + RAM1 (w!rx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (w!rx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (w!rx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +ENTRY(Reset_Handler) + +PHDRS +{ + text PT_LOAD; + ram PT_LOAD; + ram_init PT_LOAD; + tls PT_TLS; +} + +SECTIONS +{ + .init : { + KEEP (*(.vectors)) + KEEP (*(.text.init.enter)) + KEEP (*(.data.init.enter)) + KEEP (*(SORT_BY_NAME(.init) SORT_BY_NAME(.init.*))) + } >ROM0 AT>ROM0 :text + + .text : { + + /* code */ + *(.text.unlikely .text.unlikely.*) + *(.text.startup .text.startup.*) + *(.text .text.* .opd .opd.*) + *(.gnu.linkonce.t.*) + KEEP (*(.fini .fini.*)) + __text_end = .; + + PROVIDE (__etext = __text_end); + PROVIDE (_etext = __text_end); + PROVIDE (etext = __text_end); + + *(.gnu.linkonce.r.*) + + + *(.srodata.cst16) + *(.srodata.cst8) + *(.srodata.cst4) + *(.srodata.cst2) + *(.srodata .srodata.*) + *(.data.rel.ro .data.rel.ro.*) + *(.got .got.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + /* lists of constructors and destructors */ + PROVIDE_HIDDEN ( __preinit_array_start = . ); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN ( __preinit_array_end = . ); + + PROVIDE_HIDDEN ( __init_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array .ctors)) + PROVIDE_HIDDEN ( __init_array_end = . ); + + PROVIDE_HIDDEN ( __fini_array_start = . ); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array .dtors)) + PROVIDE_HIDDEN ( __fini_array_end = . ); + + } >ROM0 AT>ROM0 :text + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .veneers : + { + . = ALIGN(32); + KEEP(*(.gnu.sgstubs)) + } > ROM0 AT>ROM0 :text +#endif + + .toc : { + *(.toc .toc.*) + } >ROM0 AT>ROM0 :text + + /* additional sections when compiling with C++ exception support */ + + .except_ordered : { + *(.gcc_except_table *.gcc_except_table.*) + KEEP (*(.eh_frame .eh_frame.*)) + *(.ARM.extab* .gnu.linkonce.armextab.*) + } >ROM0 AT>ROM0 :text + + .except_unordered : { + . = ALIGN(8); + + PROVIDE(__exidx_start = .); + *(.ARM.exidx*) + PROVIDE(__exidx_end = .); + } >ROM0 AT>ROM0 :text + + + /* + * Data values which are preserved across reset + */ + .preserve (NOLOAD) : { + PROVIDE(__preserve_start__ = .); + KEEP(*(SORT_BY_NAME(.preserve.*))) + KEEP(*(.preserve)) + PROVIDE(__preserve_end__ = .); + } >RAM0 AT>RAM0 :ram + + .data : { + *(.data .data.*) + *(.gnu.linkonce.d.*) + + /* read-only data */ + *(.rdata) + *(.rodata .rodata.*) + + /* Need to pre-align so that the symbols come after padding */ + . = ALIGN(8); + + PROVIDE( __global_pointer$ = . + 0x800 ); + *(.sdata .sdata.* .sdata2.*) + *(.gnu.linkonce.s.*) + } >RAM0 AT>ROM0 :ram_init + PROVIDE(__data_start = ADDR(.data)); + PROVIDE(__data_source = LOADADDR(.data)); + + /* Thread local initialized data. This gets + * space allocated as it is expected to be placed + * in ram to be used as a template for TLS data blocks + * allocated at runtime. We're slightly abusing that + * by placing the data in flash where it will be copied + * into the allocate ram addresses by the existing + * data initialization code in crt0 + */ + .tdata : { + *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE(__data_end = .); + PROVIDE(__tdata_end = .); + } >RAM0 AT>ROM0 :tls :ram_init + PROVIDE( __tls_base = ADDR(.tdata)); + PROVIDE( __tdata_start = ADDR(.tdata)); + PROVIDE( __tdata_source = LOADADDR(.tdata) ); + PROVIDE( __tdata_source_end = LOADADDR(.tdata) + SIZEOF(.tdata) ); + PROVIDE( __data_source_end = __tdata_source_end ); + PROVIDE( __tdata_size = SIZEOF(.tdata) ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata),ALIGNOF(.tbss)) ); + + PROVIDE( __edata = __data_end ); + PROVIDE( _edata = __data_end ); + PROVIDE( edata = __data_end ); + PROVIDE( __data_size = __data_end - __data_start ); + PROVIDE( __data_source_size = __data_source_end - __data_source ); + + .tbss (NOLOAD) : { + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + PROVIDE( __tls_end = . ); + PROVIDE( __tbss_end = . ); + } >RAM0 AT>RAM0 :tls :ram + PROVIDE( __bss_start = ADDR(.tbss)); + PROVIDE( __tbss_start = ADDR(.tbss)); + PROVIDE( __tbss_offset = ADDR(.tbss) - ADDR(.tdata) ); + PROVIDE( __tbss_size = SIZEOF(.tbss) ); + PROVIDE( __tls_size = __tls_end - __tls_base ); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + PROVIDE( __arm64_tls_tcb_offset = MAX(16, __tls_align) ); + + /* + * The linker special cases .tbss segments which are + * identified as segments which are not loaded and are + * thread_local. + * + * For these segments, the linker does not advance 'dot' + * across them. We actually need memory allocated for tbss, + * so we create a special segment here just to make room + */ + /* + .tbss_space (NOLOAD) : { + . = ADDR(.tbss); + . = . + SIZEOF(.tbss); + } >RAM0 AT>RAM0 :ram + */ + + .bss (NOLOAD) : { + *(.sbss*) + *(.gnu.linkonce.sb.*) + *(.bss .bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + + /* Align the heap */ + . = ALIGN(8); + __bss_end = .; + } >RAM0 AT>RAM0 :ram + PROVIDE( __non_tls_bss_start = ADDR(.bss) ); + PROVIDE( __end = __bss_end ); + PROVIDE( _end = __bss_end ); + PROVIDE( end = __bss_end ); + PROVIDE( __bss_size = __bss_end - __bss_start ); + + /* Make the rest of memory available for heap storage */ + PROVIDE (__heap_start = __end); +#ifdef HEAP_SIZE + PROVIDE (__heap_end = __heap_start + HEAP_SIZE); + PROVIDE (__heap_size = HEAP_SIZE); +#else + PROVIDE (__heap_end = __stack - STACK_SIZE); + PROVIDE (__heap_size = __heap_end - __heap_start); +#endif + .heap (NOLOAD) : { + . += __heap_size; + } >RAM0 :ram + + /* Define a stack region to make sure it fits in memory */ + PROVIDE(__stack = ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE); + PROVIDE(__stack_limit = __stack - STACK_SIZE); + .stack (__stack_limit) (NOLOAD) : { + . += STACK_SIZE; + } >RAM0 :ram + +#if __STACKSEAL_SIZE > 0 + PROVIDE(__stack_seal = __stack); + .stackseal (__stack) (NOLOAD) : + { + . += __STACKSEAL_SIZE; + } >RAM0 :ram +#endif + + /* Throw away C++ exception handling information */ + + /* + + /DISCARD/ : { + *(.note .note.*) + *(.eh_frame .eh_frame.*) + *(.ARM.extab* .gnu.linkonce.armextab.*) + *(.ARM.exidx*) + } + + */ + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1. */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions. */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2. */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2. */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions. */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3. */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF 5. */ + .debug_addr 0 : { *(.debug_addr) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_macro 0 : { *(.debug_macro) } + .debug_names 0 : { *(.debug_names) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_str_offsets 0 : { *(.debug_str_offsets) } + .debug_sup 0 : { *(.debug_sup) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } +} +/* + * Check that sections that are copied from flash to RAM have matching + * padding, so that a single memcpy() of __data_size copies the correct bytes. + */ +ASSERT( __data_size == __data_source_size, + "ERROR: .data/.tdata flash size does not match RAM size"); diff --git a/dsppp/linker_scripts/gcc_m0p_mps3.ld b/dsppp/linker_scripts/gcc_m0p_mps3.ld new file mode 100644 index 00000000..a018e5d4 --- /dev/null +++ b/dsppp/linker_scripts/gcc_m0p_mps3.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_m4_mps3.ld b/dsppp/linker_scripts/gcc_m4_mps3.ld new file mode 100644 index 00000000..a018e5d4 --- /dev/null +++ b/dsppp/linker_scripts/gcc_m4_mps3.ld @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = __ROM0_BASE, LENGTH = __ROM0_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = __RAM0_BASE, LENGTH = __RAM0_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - __STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_sse300_mps3.ld b/dsppp/linker_scripts/gcc_sse300_mps3.ld new file mode 100644 index 00000000..e00625ea --- /dev/null +++ b/dsppp/linker_scripts/gcc_sse300_mps3.ld @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rw) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rw) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rw) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rw) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + /* *(.rodata*) */ + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + *(.rodata*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/linker_scripts/gcc_sse310_mps3_s.ld b/dsppp/linker_scripts/gcc_sse310_mps3_s.ld new file mode 100644 index 00000000..7bea37e1 --- /dev/null +++ b/dsppp/linker_scripts/gcc_sse310_mps3_s.ld @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2023 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------------- + Stack seal size definition + *----------------------------------------------------------------------------*/ +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) +#define __STACKSEAL_SIZE ( 8 ) +#else +#define __STACKSEAL_SIZE ( 0 ) +#endif + +/* ---------------------------------------------------------------------------- + Memory definition + *----------------------------------------------------------------------------*/ +MEMORY +{ + ROM0 (rx) : ORIGIN = S_CODE_START, LENGTH = S_CODE_SIZE +#if __ROM1_SIZE > 0 + ROM1 (rx) : ORIGIN = __ROM1_BASE, LENGTH = __ROM1_SIZE +#endif +#if __ROM2_SIZE > 0 + ROM2 (rx) : ORIGIN = __ROM2_BASE, LENGTH = __ROM2_SIZE +#endif +#if __ROM3_SIZE > 0 + ROM3 (rx) : ORIGIN = __ROM3_BASE, LENGTH = __ROM3_SIZE +#endif + + RAM0 (rwx) : ORIGIN = S_DATA_START, LENGTH = S_DATA_SIZE +#if __RAM1_SIZE > 0 + RAM1 (rwx) : ORIGIN = __RAM1_BASE, LENGTH = __RAM1_SIZE +#endif +#if __RAM2_SIZE > 0 + RAM2 (rwx) : ORIGIN = __RAM2_BASE, LENGTH = __RAM2_SIZE +#endif +#if __RAM3_SIZE > 0 + RAM3 (rwx) : ORIGIN = __RAM3_BASE, LENGTH = __RAM3_SIZE +#endif +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions FLASH and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext (deprecated) + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + /* *(.rodata*) */ + + KEEP(*(.eh_frame*)) + } > ROM0 + +#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) + .gnu.sgstubs : + { + . = ALIGN(32); + } > ROM0 +#endif + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ROM0 + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ROM0 + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + + LONG (LOADADDR(.data)) + LONG (ADDR(.data)) + LONG (SIZEOF(.data) / 4) + + /* Add each additional data section here */ +/* + LONG (LOADADDR(.data2)) + LONG (ADDR(.data2)) + LONG (SIZEOF(.data2) / 4) +*/ + __copy_table_end__ = .; + } > ROM0 + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + +/* .bss initialization to zero is already done during C Run-Time Startup. + LONG (ADDR(.bss)) + LONG (SIZEOF(.bss) / 4) +*/ + + /* Add each additional bss section here */ +/* + LONG (ADDR(.bss2)) + LONG (SIZEOF(.bss2) / 4) +*/ + __zero_table_end__ = .; + } > ROM0 + + /* + * This __etext variable is kept for backward compatibility with older, + * ASM based startup files. + */ + PROVIDE(__etext = LOADADDR(.data)); + + .data : ALIGN(4) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + *(.rodata*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > RAM0 AT > ROM0 + + /* + * Secondary data section, optional + * + * Remember to add each additional data section + * to the .copy.table above to assure proper + * initialization during startup. + */ +/* + .data2 : ALIGN(4) + { + . = ALIGN(4); + __data2_start__ = .; + *(.data2) + *(.data2.*) + . = ALIGN(4); + __data2_end__ = .; + + } > RAM1 AT > ROM0 +*/ + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > RAM0 AT > RAM0 + + /* + * Secondary bss section, optional + * + * Remember to add each additional bss section + * to the .zero.table above to assure proper + * initialization during startup. + */ +/* + .bss2 : + { + . = ALIGN(4); + __bss2_start__ = .; + *(.bss2) + *(.bss2.*) + . = ALIGN(4); + __bss2_end__ = .; + } > RAM1 AT > RAM1 +*/ + + .heap (NOLOAD) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > RAM0 + + .stack (ORIGIN(RAM0) + LENGTH(RAM0) - STACK_SIZE - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > RAM0 + PROVIDE(__stack = __StackTop); + +#if __STACKSEAL_SIZE > 0 + .stackseal (ORIGIN(RAM0) + LENGTH(RAM0) - __STACKSEAL_SIZE) (NOLOAD) : + { + . = ALIGN(8); + __StackSeal = .; + . = . + 8; + . = ALIGN(8); + } > RAM0 +#endif + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack") +} diff --git a/dsppp/main.c b/dsppp/main.c new file mode 100644 index 00000000..3a7cdcef --- /dev/null +++ b/dsppp/main.c @@ -0,0 +1,93 @@ +#include "test_config.h" +#include "RTE_Components.h" +#include CMSIS_device_header +#include "stdio.h" + +#if defined(MPS3) +#include "cmsis_driver_config.h" +#include "stdout_USART.h" +#endif + +#if defined(RTE_Compiler_EventRecorder) +#include "EventRecorder.h" +#endif + +#include "test.h" + + +int main(void) +{ +#if defined(MPS3) + stdout_init(); +#endif + +#if defined(RTE_Compiler_EventRecorder) && !defined(MPS3) + uint32_t res = EventRecorderInitialize (EventRecordAll, 1); + if (!res) + { + printf("Error enabling event recorder\n"); + goto endThread; + } +#endif + + #if !defined(SERIAL_DUMP) + printf("\033c\r\n\r\n"); + #endif + printf("\r\n\r\n\r\n----------------------\r\n"); + printf(__TIME__"\r\n"); + #if defined(ARMCM55) + printf("M55\r\n"); + #endif + #if defined(ARMCM4_FP) + printf("ARMCM4_FP\r\n"); + #endif + #if defined(ARMCM0P) + printf("ARMCM0P\r\n"); + #endif + + #if defined(MPS3) + printf("MPS3\r\n"); + #endif + #if defined(VHT) + printf("VHT\r\n"); + #endif + #if defined(IPSS) + printf("IPSS\r\n"); + #endif + + #if defined(DOT_TEST) + dot_test(); + #endif + #if defined(VECTOR_TEST) + vector_test(); + #endif + #if defined(ROW_TEST) + row_test(); + #endif + #if defined(COL_TEST) + col_test(); + #endif + #if defined(MATRIX_TEST) + matrix_test(); + #endif + #if 0 + filter_test(); + #endif + #if defined(FUSION_TEST) + fusion_test(); + #endif + //debug_test(); + + memory_pool_stats(); + +#if defined(MPS3) + while(1); +#else +#if defined(RTE_Compiler_EventRecorder) +endThread: +#endif + while(0); +#endif +} + + diff --git a/dsppp/mps3run.py b/dsppp/mps3run.py new file mode 100644 index 00000000..799e7145 --- /dev/null +++ b/dsppp/mps3run.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +from pyocd.core.helpers import ConnectHelper +from pyocd.flash.file_programmer import FileProgrammer +from pyocd.debug.elf.symbols import ELFSymbolProvider +from pyocd.core.target import Target +from pyocd.debug.elf.elf import ELFBinaryFile +from pyocd.flash.loader import MemoryLoader +import getserial + +import time +import os.path + +import serial +import re +import io + +import logging +logging.basicConfig(level=logging.ERROR) + + + +def run_out(exe_path,uuid): + lines= "" + + with ConnectHelper.session_with_chosen_probe(unique_id = uuid) as session: + print("Connecting") + board = session.board + target = board.target + #flash = target.memory_map.get_boot_memory() + + # Load firmware into device. + FileProgrammer(session).program(exe_path) + + #target.elf = elf_path + + + #provider = ELFSymbolProvider(target.elf) + #main_addr = provider.get_symbol_value("main") + #print("main() address: 0x%X" % main_addr) + + ## Set breakpoint. + #target.set_breakpoint(main_addr) + + #target.reset() + lines = getserial.read_stdout(target) + return("".join(lines)) + #target.resume() + ## + ## + #target.reset() + ## + ### Wait until breakpoint is hit. + #while target.get_state() != Target.State.HALTED: + # pass + ## + #pc = target.read_core_register("pc") + #print("pc: 0x%X" % pc) + # + #target.remove_breakpoint() + # + #target.resume() + +if __name__ == "__main__": + path = "." + out = "cprj/out/test/MPS3-Corstone-300" + bin = "Release/test.axf" + + axf_path = os.path.join(path,out,bin) + + #axf=ELFBinaryFile(axf_path) + #axf.close() + + lines = run_out(axf_path,"L85986697A") + + + print(lines) + + \ No newline at end of file diff --git a/dsppp/process.py b/dsppp/process.py new file mode 100644 index 00000000..be7aff9f --- /dev/null +++ b/dsppp/process.py @@ -0,0 +1,137 @@ +import re +import xlsxwriter + +START = 0 +IN_TEST = 1 +MEASURE = 2 +CYCLE_CPP = 3 +CYCLE_C = 4 +ERROR = 5 + +line_nb = 0 +state = START +dimensions = "?" + +cpp = 0 +c = 0 + +stats = {} + +with open("result.txt","r") as f: + lines = f.readlines() + for l in lines: + if line_nb >= 3: + if re.match('Error',l): + state = ERROR + continue + if state == ERROR: + state = IN_TEST + continue + if state == START: + if re.match(r'^[a-zA-Z]+.*$',l): + #print(l) + test_name = l.strip("\n") + state = IN_TEST + stats[test_name]=[] + continue + if state == IN_TEST: + if re.match(r'----',l): + state = MEASURE + continue + if re.match(r'^[a-zA-Z]+.*$',l): + state = IN_TEST + test_name = l.strip("\n") + stats[test_name]=[] + continue + if state == MEASURE: + dimensions = l.strip("\n") + state = CYCLE_CPP + continue + if state == CYCLE_CPP: + m = re.match(r'Cycle count = ([0-9]+)',l) + if m: + cpp = m.group(1) + state = CYCLE_C + continue + if state == CYCLE_C: + if re.match(r'----',l): + state = MEASURE + stats[test_name].append({"dim":dimensions,"cpp":cpp}) + continue + m = re.match(r'Cycle count = ([0-9]+)',l) + if m: + c = m.group(1) + state = IN_TEST + stats[test_name].append({"dim":dimensions,"cpp":cpp,"c":c}) + continue + else: + stats[test_name].append({"dim":dimensions,"cpp":cpp}) + state = IN_TEST + continue + + + + + + line_nb = line_nb + 1 + +dst="C:/Users/CHRFAV01/OneDrive - ARM/Documents/Presentations/CMSIS_Compute" + +def pos(row,col): + return(f"{chr(ord('A')+col)}{row}") + +for s in stats: + ns = re.sub(r'[ ]',"_",s) + ".xlsx" + print(ns) + workbook = xlsxwriter.Workbook(dst+"/"+ns) + worksheet = workbook.add_worksheet("Results") + line_nb = 0 + + title = workbook.add_format({'bold': True,'font_size':24}) + sub_title = workbook.add_format({'bold': True, + 'font_size':14, + 'align':"center", + 'bg_color':"#CCCCCC"}) + percent = workbook.add_format({'num_format': '0.00%'}) + dimEven = workbook.add_format({'bold': True,'bg_color':"#CCCCCC"}) + dimOdd = workbook.add_format({'bold': True,'bg_color':"#EEEEEE"}) + + worksheet.write(line_nb,0, s,title) + line_nb = line_nb + 1 + + worksheet.set_row(line_nb, 30) + worksheet.set_column("D:D", 30) + + if len(stats[s])==2: + worksheet.write(line_nb,0, 'dims',sub_title) + worksheet.write(line_nb,1, 'cpp',sub_title) + worksheet.write(line_nb, 2, 'CPP Improvement',sub_title) + + else: + worksheet.write(line_nb,0, 'dims',sub_title) + worksheet.write(line_nb,1, 'cpp',sub_title) + worksheet.write(line_nb,2, 'c',sub_title) + worksheet.write(line_nb, 3, 'CPP Improvement',sub_title) + + line_nb = line_nb + 1 + for x in stats[s]: + if (line_nb % 2 == 0): + dim = dimOdd + else: + dim = dimEven + if "c" in x: + worksheet.write(line_nb,0, x["dim"],dim) + worksheet.write(line_nb,1, float(x["cpp"])) + worksheet.write(line_nb,2, float(x["c"])) + worksheet.write(line_nb, 3, f"=(C{line_nb+1}-B{line_nb+1})/C{line_nb+1}",percent) + else: + worksheet.write(line_nb,0, x["dim"],dim) + worksheet.write(line_nb,1, float(x["cpp"])) + worksheet.write(line_nb, 2, f"=(C{line_nb+1}-B{line_nb+1})/C{line_nb+1}",percent) + + line_nb = line_nb + 1 + + + + workbook.close() + \ No newline at end of file diff --git a/dsppp/run_all.py b/dsppp/run_all.py new file mode 100644 index 00000000..50299044 --- /dev/null +++ b/dsppp/run_all.py @@ -0,0 +1,391 @@ +import re +import argparse +import os.path +import itertools +import subprocess +import sys +import mps3run + +from colorama import init,Fore, Back, Style + +try: + os.mkdir("ac6_results") +except: + pass + +try: + os.mkdir("gcc_results") +except: + pass + +try: + os.mkdir("clang_results") +except: + pass + +DEBUG = False +ERROR_OCCURED = False + +all_errors = [] + +def printTitle(s): + print("\n" + Fore.GREEN + Style.BRIGHT + s + Style.RESET_ALL) + +def printSubTitle(s): + print(Fore.YELLOW + Style.BRIGHT + s + Style.RESET_ALL) + +def printError(s): + print(Fore.RED + Style.BRIGHT + s + Style.RESET_ALL+"\n") + +class Result: + def __init__(self,msg,error=False): + self._error = error + self._msg = msg + + @property + def error(self): + return self._error + + @property + def msg(self): + return self._msg + +def is_error(res,test_name,err): + if res.error: + printError("Error") + all_errors.append(test_name) + print(test_name,file=err) + print(res.msg,file=err) + print("--------------",file=err) + return(True) + return(False) + +def run(args,mustPrint=False,dumpStdErr=True,timeout=20,printCmd=False): + global ERROR_OCCURED + global DEBUG + try: + if DEBUG or printCmd: + print(" ".join(args)) + result=subprocess.run(args,text=True,capture_output=True,timeout=timeout) + if result.returncode !=0 : + ERROR_OCCURED = True + if dumpStdErr: + return(Result(result.stderr + "\n\nSTDOUT:\n\n" + result.stdout,error=True)) + else: + return(Result(result.stdout,error=True)) + + if mustPrint: + print(result.stdout) + return(Result(result.stdout)) + except Exception as e: + printError("Exception occured") + ERROR_OCCURED = True + return(Result(str(e),error=True)) + +parser = argparse.ArgumentParser(description='Parse test description') +parser.add_argument('-c', nargs='?',type = str, default="M55",help="M55/M4/M0") +parser.add_argument('-p', nargs='?',type = str, default="VHT",help="VHT/MPS3") +parser.add_argument('-a', action='store_true', help="Generate allocator definitions") +parser.add_argument('-i', action='store_true', help="Refresh global allocator index") +parser.add_argument('-b', action='store_true', help="Only benchmarks") +parser.add_argument('-d', action='store_true', help="Dry run") +parser.add_argument('-g', nargs='?',type = str, default="AC6",help="AC6 / CLANG / GCC") +parser.add_argument('-u', nargs='?',type = str, default="L85986697A",help="Debug UUID") + +args = parser.parse_args() + +init() + +if args.a: + printTitle("Mode allocator generations") + +if args.i: + printTitle("Allocator test index refresh") + +NAME_TO_BOARD = { + "M55": "Corstone-300", + "Corstone-300": "Corstone-300", + "M4": "M4", + "M0" : "M0P" +} + +def results(): + if args.g == "AC6": + return("ac6_results") + + if args.g == "GCC": + return("gcc_results") + + if args.g == "CLANG": + return("clang_results") + + print(f"Compiler {args.g} not known") + exit(1) + +def target_name(): + return(f"{args.p}-{NAME_TO_BOARD[args.c]}") + +def cmd_args(): + # cbuild -O cprj test.csolution.yml -r --toolchain AC6 -c test.Release+MPS3-Corstone-300 + toolchain = args.g + target = f"test.Release+{target_name()}" + + command = ["-O", "cprj", + "test.csolution.yml", + "--toolchain", toolchain, + "-c", target] + + return(command) + + + +if args.g == "AC6": + ext = ".axf" +else: + ext = ".elf" + +fvp = {"M55":"C:\\Keil_v5\\ARM\\VHT\\VHT_Corstone_SSE-300_Ethos-U55.exe", + "M4":"C:\\Keil_v5\\ARM\\VHT\\VHT_MPS2_Cortex-M4.exe", + "M0":"C:\\Keil_v5\\ARM\\VHT\\VHT_MPS2_Cortex-M0plus.exe"} + +TESTS=["DOT_TEST", + "VECTOR_TEST", + "ROW_TEST", + "COL_TEST", + "MATRIX_TEST", + "FUSION_TEST" + ] + +# Some tests are too big (code size) and needs to be decomposed +# They contain SUBTEST1, SUBTEST2 ... #if in the code +# This script must know how many subtests are defined in each test +# suite +# No need to define an entry in this dictionary when no +# subtest is defined +SUBTESTS = {"MATRIX_TEST":19} +# Subtests that are only for testing and not benchmarks +ONLY_TESTS = {"MATRIX_TEST":[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]} + +def is_only_test(n,i): + if n[0] in ONLY_TESTS: + return(i in ONLY_TESTS[n[0]]) + return False + +DATATYPES = ["F64_DT", + "F32_DT", + "F16_DT", + "Q31_DT", + "Q15_DT", + "Q7_DT" + ] + +MODE = ["STATIC_TEST", + "DYNAMIC_TEST" + ] + +# Restricted tests for debugging +#TESTS=["DOT_TEST","VECTOR_TEST"] +#DATATYPES=["F32_DT"] +#MODE = ["STATIC_TEST"] + +all_tests = list(itertools.product(TESTS,DATATYPES,MODE)) + + + +ALLOC = "#define POOL_ALLOCATOR" +if args.a: + # Stat allocator enabled and we do stats on VHT CS300 only + ALLOC = "//#define POOL_ALLOCATOR" + args.c = "M55" + args.p = "VHT" + +BENCH = "//#define ONLY_BENCHMARKS" +if args.b: + BENCH = "#define ONLY_BENCHMARKS" + +HEADER = f"""#ifndef TEST_CONFIG_H +#define TEST_CONFIG_H + +{ALLOC} +{BENCH} + +#define %s +#define %s +#define %s +%s + +#endif +""" + + + +def out_path(): + return(os.path.join("cprj","out","test",target_name(),"Release","test"+ ext)) + +def configure_and_build_test(test_name,test,err,subtest,first): + if subtest is not None: + subteststr = f"#define SUBTEST{subtest}" + else: + subteststr = "" + with open("test_config.h","w") as c: + print(HEADER % (test + (subteststr,)),file=c) + if first: + res = run(["cbuild"] + cmd_args() + ["-r","--update-rte"],timeout=600,printCmd=True) + else: + res = run(["cbuild"] +cmd_args(),timeout=600,printCmd=True) + if not is_error(res,test_name,err): + if DEBUG: + print(res.msg) + return(True) + return(False) + +def process_allocator_data(test_name,test,msg,subtest): + lines = msg.splitlines() + state = 0 + alloc_cpp = [] + alloc_h = [] + for l in lines: + if re.match(r"^ALLOC_POOL.*$",l): + alloc_cpp.append(l.strip()) + if re.match(r"^POOL.*$",l): + alloc_h.append(l.strip()) + if subtest is not None: + HEADER=f"#if defined({test[0]}) && defined({test[1]}) && defined({test[2]}) && defined(SUBTEST{subtest})" + else: + HEADER=f"#if defined({test[0]}) && defined({test[1]}) && defined({test[2]})" + # Gen h + with open(os.path.join("allocation",test_name)+".h","w") as h: + print(HEADER,file=h) + for l in alloc_h: + print(l,file=h) + print("#endif",file=h) + + # Gen cpp + with open(os.path.join("allocation",test_name)+".cpp","w") as h: + print(HEADER,file=h) + for l in alloc_cpp: + print(l,file=h) + print("#endif",file=h) + +def process_bench(test_name,test,msg,subtest): + global DEBUG + lines = msg.splitlines() + test_name = args.p +"_" + args.c + "_" + test_name + if DEBUG: + print(os.path.join(results(),test_name)+".txt") + with open(os.path.join(results(),test_name)+".txt","w") as h: + for l in lines: + print(l.rstrip(),file=h) + + +def process_result(test_name,test,msg,subtest): + printSubTitle("Process result") + if args.a: + process_allocator_data(test_name,test,msg,subtest) + else: + process_bench(test_name,test,msg,subtest) + +def runVHT(test_name,test,err,subtest): + core = args.c + target = target_name() + config = os.path.join("fvp_configs",target) + ".txt" + #print(target) + #print(config) + if core == "M55": + exe = "cpu0=" + out_path() + else: + exe = out_path() + res=run([fvp[core],"-f",config,"-a",exe]) + if not is_error(res,test_name,err): + process_result(test_name,test,res.msg,subtest) + +def runMPS3(test_name,test,err,subtest): + lines="" + res = None + try: + exe = out_path() + lines = mps3run.run_out(exe,args.u) + res = Result(lines) + except Exception as e: + res = Result(str(e),error = True) + if not is_error(res,test_name,err): + process_result(test_name,test,res.msg,subtest) + +def runATest(test,file_err,nb,NB_MAX,current_nb_axf,nb_axf,first=True,subtest=None): + global DEBUG + if subtest is not None: + maxsub = SUBTESTS[test[0]] + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtest}" + printTitle(test_name + f" : AXF {current_nb_axf} / {nb_axf}, TEST {nb}/{NB_MAX} (subtest {subtest}/{maxsub})") + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + printTitle(test_name + f" : AXF {current_nb_axf} / {nb_axf}, TEST {nb}/{NB_MAX}") + if args.d: + return + printSubTitle("Configure and build") + if configure_and_build_test(test_name,test,file_err,subtest,first): + printSubTitle("Run") + if args.p == "VHT": + runVHT(test_name,test,file_err,subtest) + if args.p == "MPS3" and args.c == "M55": + runMPS3(test_name,test,file_err,subtest) + +nb_axf = 0 +for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + if not args.b or not is_only_test(test,subtestnbb+1): + nb_axf = nb_axf + 1 + else: + nb_axf = nb_axf + 1 +print(f"Number of axf to test = {nb_axf}") + +with open(os.path.join(results(),"errors.txt"),"w") as err: + # Generate include for allocations + if args.a or args.i: + with open(os.path.join("allocation","all.h"),"w") as fh: + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtestnbb+1}" + print(f"#include \"{test_name}.h\"",file=fh) + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + print(f"#include \"{test_name}.h\"",file=fh) + + with open(os.path.join("allocation","all.cpp"),"w") as fc: + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + test_name=f"{test[0]}_{test[1]}_{test[2]}_{subtestnbb+1}" + print(f"#include \"{test_name}.cpp\"",file=fc) + else: + test_name=f"{test[0]}_{test[1]}_{test[2]}" + print(f"#include \"{test_name}.cpp\"",file=fc) + + if not args.i: + NB_MAX = len(all_tests) + nb = 1 # test cases + current_axf = 1 + first = True + for test in all_tests: + if test[0] in SUBTESTS: + for subtestnbb in range(SUBTESTS[test[0]]): + if not args.b or not is_only_test(test,subtestnbb+1): + runATest(test,err,nb,NB_MAX,current_axf,nb_axf,first,subtestnbb+1) + current_axf = current_axf + 1 + first = False + else: + runATest(test,err,nb,NB_MAX,current_axf,nb_axf,first) + current_axf = current_axf + 1 + first = False + nb = nb + 1 + + +if ERROR_OCCURED: + printError("Error in tests:") + for n in all_errors: + printError(n) + sys.exit("Error occurred") +else: + sys.exit(0) diff --git a/dsppp/test.cbuild-pack.yml b/dsppp/test.cbuild-pack.yml new file mode 100644 index 00000000..0f3c7dcf --- /dev/null +++ b/dsppp/test.cbuild-pack.yml @@ -0,0 +1,17 @@ +cbuild-pack: + resolved-packs: + - resolved-pack: ARM::CMSIS@6.0.0 + selected-by: + - ARM::CMSIS@6.0.0 + - resolved-pack: ARM::CMSIS-Compiler@2.0.0 + selected-by: + - ARM::CMSIS-Compiler@2.0.0 + - resolved-pack: ARM::CMSIS-DSP@1.15.0 + selected-by: + - ARM::CMSIS-DSP@1.15.0 + - resolved-pack: ARM::Cortex_DFP@1.0.0 + selected-by: + - ARM::Cortex_DFP@1.0.0 + - resolved-pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 + selected-by: + - ARM::V2M_MPS3_SSE_300_BSP@1.4.0 diff --git a/dsppp/test.cproject.yml b/dsppp/test.cproject.yml new file mode 100644 index 00000000..2538d62c --- /dev/null +++ b/dsppp/test.cproject.yml @@ -0,0 +1,135 @@ +project: + groups: + - group: Tests + files: + - file: tests/matrix_test.cpp + - file: tests/dot_test.cpp + - file: tests/vector_test.cpp + - file: tests/row_test.cpp + - file: tests/col_test.cpp + #- file: tests/filter_test.cpp + - file: tests/fusion_test.cpp + #- file: tests/debug_test.cpp + #- file: tests/debug_test_external.cpp + - file: tests/common_tests.cpp + - file: tests/bench.c + - file: tests/cmsisdsp.cpp + - file: clang_sse300.c + for-context: + - +MPS3-Corstone-300 + for-compiler: + - CLANG + - group: App + files: + - file: main.c + - file: allocator.cpp + add-path: + - Include + - ../../../boost_1_84_0 + - . + - tests + + components: + - component: ARM::CMSIS:CORE + - component: ARM::CMSIS:DSP@1.15.0 + - component: ARM::Device:Startup&C Startup + for-context: + - +VHT-Corstone-300 + - +VHT-M0P + - +VHT-M4 + - +MPS3-Corstone-300 + - component: ARM::Device:Definition + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:CORE + for-context: + - +MPS3-Corstone-300 + - component: CMSIS-Compiler:STDOUT:Custom@1.0.0 + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:USART STDOUT + for-context: + - +MPS3-Corstone-300 + - component: ARM::CMSIS Driver:USART + for-context: + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysCounter + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:SysTimer + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:Timeout + for-context: + - +VHT-Corstone-300 + - +MPS3-Corstone-300 + - component: ARM::Device:Native Driver:UART + for-context: + - +MPS3-Corstone-300 + + linker: + - script: linker_scripts/gcc_sse300_mps3.ld + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: GCC + + - script: linker_scripts/clang_sse300_mps3.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: CLANG + + - script: linker_scripts/ac6_sse300_mps3_s.sct + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + for-compiler: AC6 + + - regions: linker_scripts/SSE-300-MPS3/region_defs.h + for-context: + - +MPS3-Corstone-300 + - +VHT-Corstone-300 + + - script: linker_scripts/gcc_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: GCC + + - script: linker_scripts/clang_m0p_mps3.ld + for-context: + - +VHT-M0P + for-compiler: CLANG + + - script: linker_scripts/ac6_m0p_mps3_s.sct + for-context: + - +VHT-M0P + for-compiler: AC6 + + - regions: linker_scripts/ARMCM0P/region_defs.h + for-context: + - +VHT-M0P + + - script: linker_scripts/gcc_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: GCC + + - script: linker_scripts/clang_m4_mps3.ld + for-context: + - +VHT-M4 + for-compiler: CLANG + + - script: linker_scripts/ac6_m4_mps3_s.sct + for-context: + - +VHT-M4 + for-compiler: AC6 + + - regions: linker_scripts/ARMCM4/region_defs.h + for-context: + - +VHT-M4 + + diff --git a/dsppp/test.csolution.yml b/dsppp/test.csolution.yml new file mode 100644 index 00000000..028759a7 --- /dev/null +++ b/dsppp/test.csolution.yml @@ -0,0 +1,108 @@ +solution: + compiler: AC6@6.22.0 + + language-C: c11 + language-CPP: c++17 + cdefault: + + packs: + - pack: ARM::CMSIS@6.0.0 + - pack: ARM::CMSIS-DSP@1.15.0 + - pack: ARM::V2M_MPS3_SSE_300_BSP@1.4.0 + - pack: ARM::CMSIS-Compiler@2.0.0 + - pack: ARM::Cortex_DFP@1.0.0 + + target-types: + - type: MPS3-Corstone-300 + device: ARM::SSE-300-MPS3 + board: ARM::V2M-MPS3-SSE-300-FVP + define: + - CORTEXM + - SSE300MPS3 + - MPS3 + - ARMCM55 + misc: + - for-compiler: GCC + C: + - -Wno-sign-compare + - -Wno-unused-parameter + CPP: + - -Wno-sign-compare + - -Wno-unused-parameter + Link: + - --specs=nosys.specs + - for-compiler: CLANG + C: + - -Wno-sign-compare + - -Wno-unused-parameter + CPP: + - -Wno-sign-compare + - -Wno-unused-parameter + Link: + - -lcrt0 + + - type: VHT-Corstone-300 + device: ARM::SSE-300-MPS3 + board: ARM::V2M-MPS3-SSE-300-FVP + define: + - CORTEXM + - ARMCM55 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + - type: VHT-M0P + device: ARMCM0P + #board: uVision Simulator + define: + - CORTEXM + - ARMCM0P + - DISABLEFLOAT16 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + - type: VHT-M4 + device: ARMCM4 + #board: uVision Simulator + define: + - CORTEXM + - ARMCM4_FP + - DISABLEFLOAT16 + - VHT + misc: + - for-compiler: GCC + Link: + - --specs=rdimon.specs + Library: + - -lrdimon + - for-compiler: CLANG + Link: + - -lcrt0-semihost + - -lsemihost + + build-types: + - type: Release + debug: on + + + projects: + - project: ./test.cproject.yml + - project: ./example.cproject.yml + \ No newline at end of file diff --git a/dsppp/test_config.h b/dsppp/test_config.h new file mode 100644 index 00000000..9349fbb8 --- /dev/null +++ b/dsppp/test_config.h @@ -0,0 +1,13 @@ +#ifndef TEST_CONFIG_H +#define TEST_CONFIG_H + +#define POOL_ALLOCATOR +//#define ONLY_BENCHMARKS + +#define VECTOR_TEST +#define F32_DT +#define STATIC_TEST + + +#endif + diff --git a/dsppp/tests/bench.c b/dsppp/tests/bench.c new file mode 100644 index 00000000..d4055c84 --- /dev/null +++ b/dsppp/tests/bench.c @@ -0,0 +1,3 @@ +#include "bench.h" + +uint32_t start_time, stop_time, cycle_count; diff --git a/dsppp/tests/bench.h b/dsppp/tests/bench.h new file mode 100644 index 00000000..b045ddee --- /dev/null +++ b/dsppp/tests/bench.h @@ -0,0 +1,60 @@ +#if !defined(HOST) +#if !defined(NORTE) + #include "RTE_Components.h" + #include CMSIS_device_header +#endif +#endif + +#ifdef __cplusplus + +#include +#else +#include +#endif + +#ifdef __cplusplus + + +extern "C" +{ +#endif + +extern uint32_t start_time; +extern uint32_t stop_time; +extern uint32_t cycle_count; + +#if defined(HOST) +#define INIT_SYSTICK +#define START_CYCLE_MEASUREMENT +#define STOP_CYCLE_MEASUREMENT +#else +#define INIT_SYSTICK \ + SysTick->CTRL=0; \ + SysTick->LOAD=0xFFFFFFUL;\ + SysTick->VAL=0; \ + SysTick->CTRL=5; \ + while (SysTick->VAL==0)\ + ; + +#define START_CYCLE_MEASUREMENT \ + start_time= SysTick->VAL; + +#define STOP_CYCLE_MEASUREMENT \ + stop_time= SysTick->VAL; \ + SysTick->CTRL=0; \ + cycle_count = start_time - stop_time; \ + printf ("Cycle count = %d\r\n",(int)cycle_count); +#endif + +#if !defined(HOST) && (__ARM_ARCH > 6) +#define dbgInst(imm) __asm volatile("DBG %0\n\t" : :"Ir" ((imm)) ) +#define startSectionNB(num) dbgInst(((num) & 0x7) | 0x0) +#define stopSectionNB(num) dbgInst(((num) & 0x7) | 0x8) +#else +#define startSectionNB(num) +#define stopSectionNB(num) +#endif + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/dsppp/tests/cmsis_tests.h b/dsppp/tests/cmsis_tests.h new file mode 100644 index 00000000..ed5e4486 --- /dev/null +++ b/dsppp/tests/cmsis_tests.h @@ -0,0 +1,699 @@ +#pragma once + + +extern "C" { +#include "arm_math_types.h" +#include "arm_math_types_f16.h" +#include "dsp/filtering_functions.h" +#include "dsp/matrix_functions.h" +#include "dsp/matrix_functions_f16.h" + +} + +template +struct NameOfType; + +template +struct TailForTests; + +template<> +struct NameOfType +{ + constexpr static const char* v="float64_t"; + constexpr static const char* xls="f64"; +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="float32_t"; + constexpr static const char* xls="f32"; + +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct NameOfType +{ + constexpr static const char* v="float16_t"; + constexpr static const char* xls="f16"; + +}; +#endif + +template<> +struct NameOfType +{ + constexpr static const char* v="q31"; + constexpr static const char* xls="q31"; + +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="q15"; + constexpr static const char* xls="q15"; + +}; + +template<> +struct NameOfType +{ + constexpr static const char* v="q7"; + constexpr static const char* xls="q7"; + +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 1; + constexpr static const int loop = 2; + +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 3; + constexpr static const int loop = 2*4; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct TailForTests +{ + constexpr static const int tail = 7; + constexpr static const int loop = 2*8; + +}; +#endif + +template<> +struct TailForTests +{ + constexpr static const int tail = 3; + constexpr static const int loop = 2*4; +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 7; + constexpr static const int loop = 2*8; +}; + +template<> +struct TailForTests +{ + constexpr static const int tail = 15; + constexpr static const int loop = 2*16; +}; + +#include "common_tests.h" + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t l); +#endif + +extern void cmsisdsp_add(const float64_t* a, + const float64_t* b, + float64_t* c, + uint32_t l); + +extern void cmsisdsp_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t l); + +extern void cmsisdsp_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t l); + +extern void cmsisdsp_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t l); + +extern void cmsisdsp_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t l); + +extern void cmsisdsp_mat_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t row,uint32_t col); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_mat_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t row,uint32_t col); +#endif + +extern void cmsisdsp_mat_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t row,uint32_t col); + +extern void cmsisdsp_mat_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t row,uint32_t col); + +extern void cmsisdsp_mat_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t row,uint32_t col); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_dot(const float16_t* a, + const float16_t* b, + float16_t &c, + uint32_t l); +#endif + +extern void cmsisdsp_dot(const float64_t* a, + const float64_t* b, + float64_t &c, + uint32_t l); + +extern void cmsisdsp_dot(const float32_t* a, + const float32_t* b, + float32_t &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q31* a, + const Q31* b, + Q<15,48> &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q15* a, + const Q15* b, + Q<33,30> &c, + uint32_t l); + +extern void cmsisdsp_dot(const Q7* a, + const Q7* b, + Q<17,14> &c, + uint32_t l); + +extern void cmsisdsp_dot_expr(const double* a, + const double* b, + const double* c, + const double* d, + double* tmp1, + double* tmp2, + const double scale, + double &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const float32_t* a, + const float32_t* b, + const float32_t* c, + const float32_t* d, + float32_t* tmp1, + float32_t* tmp2, + const float32_t scale, + float32_t &r, + uint32_t l); + +#if !defined(DISABLEFLOAT16) +extern void cmsisdsp_dot_expr(const float16_t* a, + const float16_t* b, + const float16_t* c, + const float16_t* d, + float16_t* tmp1, + float16_t* tmp2, + const float16_t scale, + float16_t &r, + uint32_t l); +#endif + +extern void cmsisdsp_dot_expr(const Q7* a, + const Q7* b, + const Q7* c, + const Q7* d, + Q7* tmp1, + Q7* tmp2, + const Q7 scale, + Q<17,14> &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const Q15* a, + const Q15* b, + const Q15* c, + const Q15* d, + Q15* tmp1, + Q15* tmp2, + const Q15 scale, + Q<33,30> &r, + uint32_t l); + +extern void cmsisdsp_dot_expr(const Q31* a, + const Q31* b, + const Q31* c, + const Q31* d, + Q31* tmp1, + Q31* tmp2, + const Q31 scale, + Q<15,48> &r, + uint32_t l); + +extern void cmsisdsp_fir(const arm_fir_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q7 * S, + const Q7 * pSrc, + Q7 * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q15 * S, + const Q15 * pSrc, + Q15 * pDst, + uint32_t blockSize); + +extern void cmsisdsp_fir(const arm_fir_instance_q31 * S, + const Q31 * pSrc, + Q31 * pDst, + uint32_t blockSize); + +template +struct CMSISOuter { + static void run(const T *a, + const T *b, + T *res, + const uint32_t r,const uint32_t c) + { + DISABLE_LOOP_UNROLL + for(unsigned int row=0; row +struct CMSISOuter { + static void run(const float32_t *a, + const float32_t *b, + float32_t *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct CMSISOuter { + static void run(const float16_t *a, + const float16_t *b, + float16_t *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; +#endif + +template<> +struct CMSISOuter { + static void run(const Q31 *a, + const Q31 *b, + Q31 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +template<> +struct CMSISOuter { + static void run(const Q15 *a, + const Q15 *b, + Q15 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +template<> +struct CMSISOuter { + static void run(const Q7 *a, + const Q7 *b, + Q7 *res, + const uint32_t r,const uint32_t c) + { + _cmsis_outer(a,b,res,r,c); + } +}; + +#endif + +extern void cmsis_init_householder(double *f,const int nb); +extern void cmsis_init_householder(float32_t *f,const int nb); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_householder(float16_t *f,const int nb); +#endif + +extern void cmsis_init_qr(double *f,const int r,const int c); +extern void cmsis_init_qr(float32_t *f,const int r,const int c); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_qr(float16_t *f,const int r,const int c); +#endif + +extern void cmsis_init_cholesky(double *f,const int r,const int c); +extern void cmsis_init_cholesky(float32_t *f,const int r,const int c); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_init_cholesky(float16_t *f,const int r,const int c); +#endif + +extern void cmsis_mat_mult(const arm_matrix_instance_f64* a, + const arm_matrix_instance_f64* b, + arm_matrix_instance_f64 *c, + double *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_f32* a, + const arm_matrix_instance_f32* b, + arm_matrix_instance_f32 *c, + float32_t *pState); +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_mult(const arm_matrix_instance_f16* a, + const arm_matrix_instance_f16* b, + arm_matrix_instance_f16 *c, + float16_t *pState); +#endif + +extern void cmsis_mat_mult(const arm_matrix_instance_q7* a, + const arm_matrix_instance_q7* b, + arm_matrix_instance_q7 *c, + q7_t *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_q15* a, + const arm_matrix_instance_q15* b, + arm_matrix_instance_q15 *c, + q15_t *pState); + +extern void cmsis_mat_mult(const arm_matrix_instance_q31* a, + const arm_matrix_instance_q31* b, + arm_matrix_instance_q31 *c, + q31_t *pState); + +extern void cmsis_mat_trans(const arm_matrix_instance_q7* a, + arm_matrix_instance_q7* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_q15* a, + arm_matrix_instance_q15* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_q31* a, + arm_matrix_instance_q31* b); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_trans(const arm_matrix_instance_f16* a, + arm_matrix_instance_f16* b); +#endif + +extern void cmsis_mat_trans(const arm_matrix_instance_f64* a, + arm_matrix_instance_f64* b); + +extern void cmsis_mat_trans(const arm_matrix_instance_f32* a, + arm_matrix_instance_f32* b); + +extern double cmsis_householder(const double *,double* ,uint32_t); + +extern float32_t cmsis_householder(const float32_t *,float32_t* ,uint32_t); + +#if !defined(DISABLEFLOAT16) +extern float16_t cmsis_householder(const float16_t *,float16_t* ,uint32_t); +#endif + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f64 *pSrcMat, + const double *pVec, + double *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f32 *pSrcMat, + const float32_t *pVec, + float32_t *pDst); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_f16 *pSrcMat, + const float16_t *pVec, + float16_t *pDst); +#endif + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q31 *pSrcMat, + const Q31 *pVec, + Q31 *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q15 *pSrcMat, + const Q15 *pVec, + Q15 *pDst); + +extern void cmsis_mat_vec_mult( + const arm_matrix_instance_q7 *pSrcMat, + const Q7 *pVec, + Q7 *pDst); + +extern arm_status cmsis_qr( + const arm_matrix_instance_f64 * pSrc, + const double threshold, + arm_matrix_instance_f64 * pOutR, + arm_matrix_instance_f64 * pOutQ, + double * pOutTau, + double *pTmpA, + double *pTmpB + ); + +extern arm_status cmsis_qr( + const arm_matrix_instance_f32 * pSrc, + const float32_t threshold, + arm_matrix_instance_f32 * pOutR, + arm_matrix_instance_f32 * pOutQ, + float32_t * pOutTau, + float32_t *pTmpA, + float32_t *pTmpB + ); + +#if !defined(DISABLEFLOAT16) +extern arm_status cmsis_qr( + const arm_matrix_instance_f16 * pSrc, + const float16_t threshold, + arm_matrix_instance_f16 * pOutR, + arm_matrix_instance_f16 * pOutQ, + float16_t * pOutTau, + float16_t *pTmpA, + float16_t *pTmpB + ); +#endif + +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * dst); + +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * dst); + +#if !defined(DISABLEFLOAT16) +extern arm_status cmsis_cholesky( + const arm_matrix_instance_f16 * src, + arm_matrix_instance_f16 * dst); +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f64 * src, + const double * a, + const double * b, + const double scalar, + double * tmp, + double * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f32 * src, + const float32_t * a, + const float32_t * b, + const float32_t scalar, + float32_t * tmp, + float32_t * dst); + +#if !defined(DISABLEFLOAT16) +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f16 * src, + const float16_t * a, + const float16_t * b, + const float16_t scalar, + float16_t * tmp, + float16_t * dst); +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q31 * src, + const Q31 * a, + const Q31 * b, + const Q31 scalar, + Q31 * tmp, + Q31 * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q15 * src, + const Q15 * a, + const Q15 * b, + const Q15 scalar, + Q15 * tmp, + Q15 * dst); + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q7 * src, + const Q7 * a, + const Q7 * b, + const Q7 scalar, + Q7 * tmp, + Q7 * dst); + +template +struct CMSISMatrixType; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f64 type; + typedef double scalar; +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f32 type; + typedef float32_t scalar; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_f16 type; + typedef float16_t scalar; +}; +#endif + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q7 type; + typedef q7_t scalar; + +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q15 type; + typedef q15_t scalar; + +}; + +template<> +struct CMSISMatrixType +{ + typedef arm_matrix_instance_q31 type; + typedef q31_t scalar; + +}; + +template +struct TestConstant; + +template<> +struct TestConstant +{ + constexpr static double v = 0.2; + constexpr static double small = 0.001; +}; + +template<> +struct TestConstant +{ + constexpr static float v = 0.2f; + constexpr static float small = 0.001f; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct TestConstant +{ + constexpr static float16_t v = 0.2f; + constexpr static float16_t small = 0.001f; + +}; +#endif + +template<> +struct TestConstant +{ + constexpr static Q7 v = 0.2_q7; + constexpr static Q7 small = 0.001_q7; +}; + + +template<> +struct TestConstant +{ + constexpr static Q15 v = 0.2_q15; + constexpr static Q15 small = 0.001_q15; +}; + +template<> +struct TestConstant +{ + constexpr static Q31 v = 0.2_q31; + constexpr static Q31 small = 0.001_q31; +}; \ No newline at end of file diff --git a/dsppp/tests/cmsisdsp.cpp b/dsppp/tests/cmsisdsp.cpp new file mode 100644 index 00000000..7c6ad0c6 --- /dev/null +++ b/dsppp/tests/cmsisdsp.cpp @@ -0,0 +1,1146 @@ +#include "allocator.h" + +#include +#include +#include + +using namespace arm_cmsis_dsp; + + +#include "dsp/basic_math_functions.h" +#include "dsp/basic_math_functions_f16.h" +#include "dsp/filtering_functions.h" +#include "dsp/matrix_functions.h" +#include "dsp/matrix_functions_f16.h" + + +#include "bench.h" + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t l) +{ + + arm_add_f16(a,b,c,l); +}; +#endif + + +void cmsisdsp_add(const float64_t* a, + const float64_t* b, + float64_t* c, + uint32_t l) +{ + + arm_add_f64(a,b,c,l); +}; + + +void cmsisdsp_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t l) +{ + arm_add_f32(a,b,c,l); +}; + + + + +void cmsisdsp_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t l) +{ + + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + + +void cmsisdsp_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t l) +{ + + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + +void cmsisdsp_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t l) +{ + + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(c),l); +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_dot(const float16_t* a, + const float16_t* b, + float16_t &c, + uint32_t l) +{ + arm_dot_prod_f16(a,b,l,&c); +}; +#endif + + +void cmsisdsp_dot(const float64_t* a, + const float64_t* b, + float64_t &c, + uint32_t l) +{ + arm_dot_prod_f64(a,b,l,&c); +}; + +void cmsisdsp_dot(const float32_t* a, + const float32_t* b, + float32_t &c, + uint32_t l) +{ + arm_dot_prod_f32(a,b,l,&c); +}; + + + + +void cmsisdsp_dot(const Q31* a, + const Q31* b, + Q<15,48> &c, + uint32_t l) +{ + arm_dot_prod_q31(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + + +void cmsisdsp_dot(const Q15* a, + const Q15* b, + Q<33,30> &c, + uint32_t l) +{ + arm_dot_prod_q15(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + + +void cmsisdsp_dot(const Q7* a, + const Q7* b, + Q<17,14> &c, + uint32_t l) +{ + arm_dot_prod_q7(reinterpret_cast(a), + reinterpret_cast(b),l, + reinterpret_cast(&c)); +}; + +void cmsisdsp_dot_expr(const double* a, + const double* b, + const double* c, + const double* d, + double* tmp1, + double* tmp2, + const double scale, + double &r, + uint32_t l) +{ + arm_add_f64(a,b,tmp1,l); + arm_scale_f64(tmp1,scale,tmp1,l); + arm_mult_f64(c,d,tmp2,l); + arm_dot_prod_f64(tmp1,tmp2,l,&r); +}; + +void cmsisdsp_dot_expr(const float32_t* a, + const float32_t* b, + const float32_t* c, + const float32_t* d, + float32_t* tmp1, + float32_t* tmp2, + const float32_t scale, + float32_t &r, + uint32_t l) +{ + arm_add_f32(a,b,tmp1,l); + arm_scale_f32(tmp1,scale,tmp1,l); + arm_mult_f32(c,d,tmp2,l); + arm_dot_prod_f32(tmp1,tmp2,l,&r); +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_dot_expr(const float16_t* a, + const float16_t* b, + const float16_t* c, + const float16_t* d, + float16_t* tmp1, + float16_t* tmp2, + const float16_t scale, + float16_t &r, + uint32_t l) +{ + arm_add_f16(a,b,tmp1,l); + arm_scale_f16(tmp1,scale,tmp1,l); + arm_mult_f16(c,d,tmp2,l); + arm_dot_prod_f16(tmp1,tmp2,l,&r); +}; +#endif + +void cmsisdsp_fir(const arm_fir_instance_f32 * S, + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + arm_fir_f32(S,pSrc,pDst,blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q7 * S, + const Q7 * pSrc, + Q7 * pDst, + uint32_t blockSize) +{ + arm_fir_q7(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q15 * S, + const Q15 * pSrc, + Q15 * pDst, + uint32_t blockSize) +{ + arm_fir_q15(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + +void cmsisdsp_fir(const arm_fir_instance_q31 * S, + const Q31 * pSrc, + Q31 * pDst, + uint32_t blockSize) +{ + arm_fir_q31(S,reinterpret_cast(pSrc), + reinterpret_cast(pDst),blockSize); +}; + + +void cmsisdsp_dot_expr(const Q7* a, + const Q7* b, + const Q7* c, + const Q7* d, + Q7* tmp1, + Q7* tmp2, + const Q7 scale, + Q<17,14> &r, + uint32_t l) +{ + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q7(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + + + arm_mult_q7(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + + + arm_dot_prod_q7(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + +void cmsisdsp_dot_expr(const Q15* a, + const Q15* b, + const Q15* c, + const Q15* d, + Q15* tmp1, + Q15* tmp2, + const Q15 scale, + Q<33,30> &r, + uint32_t l) +{ + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q15(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + arm_mult_q15(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + arm_dot_prod_q15(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + +void cmsisdsp_dot_expr(const Q31* a, + const Q31* b, + const Q31* c, + const Q31* d, + Q31* tmp1, + Q31* tmp2, + const Q31 scale, + Q<15,48> &r, + uint32_t l) +{ + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(tmp1),l); + arm_scale_q31(reinterpret_cast(tmp1),scale.v,0, + reinterpret_cast(tmp1),l); + arm_mult_q31(reinterpret_cast(c), + reinterpret_cast(d), + reinterpret_cast(tmp2),l); + arm_dot_prod_q31(reinterpret_cast(tmp1), + reinterpret_cast(tmp2),l,&r.v); +}; + + +void cmsisdsp_mat_add(const float32_t* a, + const float32_t* b, + float32_t* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_f32 srca; + arm_matrix_instance_f32 srcb; + + arm_matrix_instance_f32 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = (float32_t*)a; + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = (float32_t*)b; + + dst.numRows = row; + dst.numCols = col; + dst.pData = c; + arm_mat_add_f32(&srca,&srcb,&dst); + +} + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_mat_add(const float16_t* a, + const float16_t* b, + float16_t* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_f16 srca; + arm_matrix_instance_f16 srcb; + + arm_matrix_instance_f16 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = (float16_t*)a; + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = (float16_t*)b; + + dst.numRows = row; + dst.numCols = col; + dst.pData = c; + arm_mat_add_f16(&srca,&srcb,&dst); + +} +#endif + +void cmsisdsp_mat_add(const Q31* a, + const Q31* b, + Q31* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_q31 srca; + arm_matrix_instance_q31 srcb; + + arm_matrix_instance_q31 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q31(&srca,&srcb,&dst); + +} + +void cmsisdsp_mat_add(const Q15* a, + const Q15* b, + Q15* c, + uint32_t row,uint32_t col) +{ + arm_matrix_instance_q15 srca; + arm_matrix_instance_q15 srcb; + + arm_matrix_instance_q15 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q15(&srca,&srcb,&dst); + +} + + +void cmsisdsp_mat_add(const Q7* a, + const Q7* b, + Q7* c, + uint32_t row,uint32_t col) +{ + (void)a; + (void)b; + (void)c; + (void)row; + (void)col; + // Doing nothing since there is no equivalent CMSIS-DSP + // function + // Required to enable the build + + /* + arm_matrix_instance_q7 srca; + arm_matrix_instance_q7 srcb; + + arm_matrix_instance_q7 dst; + + + srca.numRows = row; + srca.numCols = col; + srca.pData = reinterpret_cast(const_cast(a)); + + srcb.numRows = row; + srcb.numCols = col; + srcb.pData = reinterpret_cast(const_cast(b)); + + dst.numRows = row; + dst.numCols = col; + dst.pData = reinterpret_cast(c); + arm_mat_add_q7(&srca,&srcb,&dst); +*/ +} + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +void _cmsis_outer(const float32_t *a, + const float32_t *b, + float32_t *res, + const uint32_t r,const uint32_t c) +{ + for(unsigned int row=0; row(a); + const q31_t *pb = reinterpret_cast(b); + q31_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(a); + const q15_t *pb = reinterpret_cast(b); + q15_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(a); + const q7_t *pb = reinterpret_cast(b); + q7_t *pr = reinterpret_cast(res); + for(unsigned int row=0; row(pVec), + reinterpret_cast(pDst)); +} + +void cmsis_mat_vec_mult( + const arm_matrix_instance_q15 *pSrcMat, + const Q15 *pVec, + Q15 *pDst) +{ +arm_mat_vec_mult_q15(pSrcMat, + reinterpret_cast(pVec), + reinterpret_cast(pDst)); +} + +void cmsis_mat_vec_mult( + const arm_matrix_instance_q7 *pSrcMat, + const Q7 *pVec, + Q7 *pDst) +{ +arm_mat_vec_mult_q7(pSrcMat, + reinterpret_cast(pVec), + reinterpret_cast(pDst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f32 * src, + const float32_t * a, + const float32_t * b, + const float32_t scalar, + float32_t * tmp, + float32_t * dst) +{ + arm_scale_f32(b,scalar,tmp,src->numCols); + arm_add_f32(a,tmp,tmp,src->numCols); + arm_mat_vec_mult_f32(src, tmp, dst); +} + +#if !defined(DISABLEFLOAT16) +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_f16 * src, + const float16_t * a, + const float16_t * b, + const float16_t scalar, + float16_t * tmp, + float16_t * dst) +{ + arm_scale_f16(b,scalar,tmp,src->numCols); + arm_add_f16(a,tmp,tmp,src->numCols); + arm_mat_vec_mult_f16(src, tmp, dst); +} +#endif + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q31 * src, + const Q31 * a, + const Q31 * b, + const Q31 scalar, + Q31 * tmp, + Q31 * dst) +{ + arm_scale_q31(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q31(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q31(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q15 * src, + const Q15 * a, + const Q15 * b, + const Q15 scalar, + Q15 * tmp, + Q15 * dst) +{ + arm_scale_q15(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q15(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q15(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} + +extern void cmsis_complex_mat_vec( + const arm_matrix_instance_q7 * src, + const Q7 * a, + const Q7 * b, + const Q7 scalar, + Q7 * tmp, + Q7 * dst) +{ + arm_scale_q7(reinterpret_cast(b), + scalar.v,0, + reinterpret_cast(tmp),src->numCols); + arm_add_q7(reinterpret_cast(a), + reinterpret_cast(tmp), + reinterpret_cast(tmp),src->numCols); + arm_mat_vec_mult_q7(src, + reinterpret_cast(tmp), + reinterpret_cast(dst)); +} \ No newline at end of file diff --git a/dsppp/tests/col_test.cpp b/dsppp/tests/col_test.cpp new file mode 100644 index 00000000..f7c2bde4 --- /dev/null +++ b/dsppp/tests/col_test.cpp @@ -0,0 +1,112 @@ +extern "C" { + extern void col_test(); +} + +#include "allocator.h" + +#include +#include +#include + +#include + +#include + + + +#include "dsp/matrix_functions.h" +#include "matrix_utils.h" + +template +static void test() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + PVector ref; + #else + PMat a(R,C); + PVector ref(R); + #endif + + init_array(a,R*C); + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector res = copy(a.col(4)); + #else + PVector res = copy(a.col(4)); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + for(int i=0;i +void all_col_test() +{ + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + title("Col test"); + + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + +} + +void col_test() +{ +#if defined(COL_TEST) + #if defined(F64_DT) + all_col_test(); + #endif + #if defined(F32_DT) + all_col_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_col_test(); + #endif + #if defined(Q31_DT) + all_col_test(); + #endif + #if defined(Q15_DT) + all_col_test(); + #endif + #if defined(Q7_DT) + all_col_test(); + #endif +#endif +} \ No newline at end of file diff --git a/dsppp/tests/common_tests.cpp b/dsppp/tests/common_tests.cpp new file mode 100644 index 00000000..999eb3cf --- /dev/null +++ b/dsppp/tests/common_tests.cpp @@ -0,0 +1,48 @@ +#include "allocator.h" +#include +#include +#include + +#include "cmsis_tests.h" + +extern "C" { + void memory_pool_stats(); +} + +#if 0 +template<> +void init_array(Vector_Base &pDst,std::size_t nb) +{ + for(std::size_t i=0;i +bool validate(const float32_t* a, + const float32_t* b, + std::size_t nb, + float abser, + float reler) +{ + for(std::size_t i=0;i +#include "allocator.h" + +using namespace arm_cmsis_dsp; + +#define REL_ERROR (1.0e-6) +#define ABS_ERROR (1.0e-6) +#define ERROR(A,B,AE,RE) ((fabs((A) - (B)) > (AE + RE * fabs((B))))) +#define ERRVAL(VAL,REF,AE,RE) \ + std::cout << "Error = " << fabs(VAL-REF) << "\r\n"; \ + std::cout << "compared to " << (AE + RE * abs((REF))) << "\r\n"; + +/************ + * + * Data types + * + */ + + +#if defined(POOL_ALLOCATOR) + + template + using PVector = Vector; + + template + using PMat = Matrix; + +#else + + template + using PVector = Vector; + + template + using PMat = Matrix; + +#endif + +template +using PView = VectorView; + +template typename A> +void init_array(Vector &pDst,std::size_t nb) +{ + for(std::size_t i=0;i typename A> +void init_array(Vector &pDst,std::size_t nb) +{ + for(std::size_t i=0;i +void init_array(Vector_Base &pDst,std::size_t nb) +{ + for(std::size_t i=0;i typename A> +//void init_array(Vector &pDst,std::size_t nb); + + +//extern template void init_array<>(Vector_Base &pDst,std::size_t nb); + + +template::value,bool>::type = true> +bool validate(const T a, const T b, std::size_t nb,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(std::size_t i=0;i>>::is_float) + { + if (ERROR(a[i],b[i],abser,reler) ) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + ERRVAL(a[i],b[i],abser,reler); + return(false); + } + } + else + { + if (a[i]!=b[i]) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + return(false); + } + } + } + return(true); +} + +template::value && + !HasMatrixIndexing::value && + IsVector::value && + !HasMatrixIndexing::value,bool>::type = true> +bool validate(const TA &a, const TB &b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t i=0;i::type>::is_float) + { + if (ERROR(a[i],b[i],abser,reler) ) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + ERRVAL(a[i],b[i],abser,reler); + return(false); + } + } + else + { + if (a[i]!=b[i]) + { + std::cout << "Error at:" << i << " ; res=" << a[i] << " ; ref=" << b[i] << "\r\n"; + return(false); + } + } + } + return(true); +} + + +template::value + && !IsVector::value && !HasMatrixIndexing::value,bool>::type = true> +bool validate(const T a, const T b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + + if constexpr (number_traits>::is_float) + { + if (ERROR(a,b,abser,reler)) + { + std::cout << "Error: res=" << a << " ; ref=" << b << "\r\n"; + ERRVAL(a,b,abser,reler); + return(false); + } + } + else + { + if (a != b ) + { + std::cout << "Error : res=" << a << " ; ref=" << b << "\r\n"; + return(false); + } + } + + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_float,bool>::type = true> +bool validate(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col < a.columns() ; col++) + { + if (ERROR(a(row,col),b(row,col),abser,reler) ) + { + //std::cout << fabs(a(row,col)-b(row,col)) << "\r\n"; + //std::cout << REL_ERROR*fabsf(a(row,col)) << "\r\n"; + //std::cout << a(row,col) << "\r\n"; + //std::cout << b(row,col) << "\r\n"; + + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + ERRVAL(a(row,col),b(row,col),abser,reler); + return(false); + } + } + } + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_float,bool>::type = true> +bool validateLT(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col <= row ; col++) + { + if (ERROR(a(row,col),b(row,col),abser,reler) ) + { + //std::cout << fabs(a(row,col)-b(row,col)) << "\r\n"; + //std::cout << REL_ERROR*fabsf(a(row,col)) << "\r\n"; + //std::cout << a(row,col) << "\r\n"; + //std::cout << b(row,col) << "\r\n"; + + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + ERRVAL(a(row,col),b(row,col),abser,reler); + return(false); + } + } + } + return(true); +} + +template::value && + HasMatrixIndexing::value && + number_traits::type>::is_fixed,bool>::type = true> +bool validate(const MA& a, const MB& b,float abser = ABS_ERROR, float reler = REL_ERROR) +{ + (void)abser; + (void)reler; + for(index_t row=0;row < a.rows() ; row++) + { + for(index_t col=0;col < a.columns() ; col++) + { + if (a(row,col).v != b(row,col).v) + { + std::cout << "Error at : (" << row << "," << col << ") ; res=" << a(row,col) << " ; ref=" << b(row,col) << "\r\n"; + std::cout << "Error = " << abs(a(row,col).v - b(row,col).v) << "\r\n"; + return(false); + } + } + } + return(true); +} + +template<> +bool validate(const float32_t* a, const float32_t* b, std::size_t nb,float abser , float reler ); + + +extern template +bool validate<>(const float32_t* a, const float32_t* b, std::size_t nb,float abser , float reler ); + + + + + +template +void title(const std::string &s) +{ +#if !defined(SERIAL_DUMP) +#if defined(STATIC_TEST) + std::cout<<"\r\n\033[31;1;4m" << s << " " << NameOfType::xls << "\033[0m\r\n"; +#else + std::cout<<"\r\n\033[31;1;4m" << s << " dynamic " << NameOfType::xls << "\033[0m\r\n"; +#endif +#else +#if defined(STATIC_TEST) + std::cout << "\r\n" << s << " " << NameOfType::xls << "\r\n"; +#else + std::cout << "\r\n" << s << " dynamic " << NameOfType::xls << "\r\n"; +#endif +#endif +}; \ No newline at end of file diff --git a/dsppp/tests/debug_mat.h b/dsppp/tests/debug_mat.h new file mode 100644 index 00000000..a0d7c707 --- /dev/null +++ b/dsppp/tests/debug_mat.h @@ -0,0 +1,738 @@ +void pmat(float32_t *p,int nbrows,int nbcols) +{ + for(int r=0;rnumRows < pSrc->numCols) + { + return(ARM_MATH_SIZE_MISMATCH); + } + + memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t)); + pOutR->numCols = pSrc->numCols; + pOutR->numRows = pSrc->numRows; + + p = pOutR->pData; + + pc = pOutTau; + for(col=0 ; col < pSrc->numCols; col++) + { + int32_t j,k,blkCnt,blkCnt2; + float32_t *pa0,*pa1,*pa2,*pa3,*ptemp; + float32_t temp; + float32x4_t v1,v2,vtemp; + + + COPY_COL_F32(pOutR,col,col,pTmpA); + + beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA); + *pc++ = beta; + + //pvec(pTmpA,pSrc->numRows-col); + //pmat(p,pSrc->numRows-col,pSrc->numCols-col); + + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + pv = pTmpA; + pa = p; + + temp = *pv; + blkCnt = (pSrc->numCols-col) >> 2; + while (blkCnt > 0) + { + v1 = vld1q_f32(pa); + v2 = vmulq_n_f32(v1,temp); + vst1q_f32(pdst,v2); + + pa += 4; + pdst += 4; + blkCnt--; + } + blkCnt = (pSrc->numCols-col) & 3; + if (blkCnt > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt); + v1 = vld1q_f32(pa); + v2 = vmulq_n_f32(v1,temp); + vst1q_p_f32(pdst,v2,p0); + + pa += blkCnt; + } + + + + pa += col; + pv++; + pdst = pTmpB; + + pa0 = pa; + pa1 = pa0 + pSrc->numCols; + pa2 = pa1 + pSrc->numCols; + pa3 = pa2 + pSrc->numCols; + + /* Unrolled loop */ + blkCnt = (pSrc->numRows-col - 1) >> 2; + k=1; + while(blkCnt > 0) + { + vtemp=vld1q_f32(pv); + + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3)); + + vst1q_f32(pdst,v1); + + pdst += 4; + pa0 += 4; + pa1 += 4; + pa2 += 4; + pa3 += 4; + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3)); + + vst1q_p_f32(pdst,v1,p0); + + pa0 += blkCnt2; + pa1 += blkCnt2; + pa2 += blkCnt2; + pa3 += blkCnt2; + } + + pa0 += col + 3*pSrc->numCols; + pa1 += col + 3*pSrc->numCols; + pa2 += col + 3*pSrc->numCols; + pa3 += col + 3*pSrc->numCols; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-col; k++) + { + temp = *pv; + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(pdst); + v2 = vfmaq_n_f32(v2,v1,temp); + vst1q_f32(pdst,v2); + + pa += 4; + pdst += 4; + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + v1 = vld1q_f32(pa); + v2 = vld1q_f32(pdst); + v2 = vfmaq_n_f32(v2,v1,temp); + vst1q_p_f32(pdst,v2,p0); + + pa += blkCnt2; + } + + pa += col; + pv++; + pdst = pTmpB; + } + + //pvec(pTmpB,pSrc->numCols-col); + //printf("--\r\n"); + + /* A(col:,col:) - beta v tmpb */ + pa = p; + for(j=0;jnumRows-col; j++) + { + float32_t f = -beta * pTmpA[j]; + ptemp = pTmpB; + + blkCnt2 = (pSrc->numCols-col) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_f32(pa,v1); + + pa += 4; + ptemp += 4; + + blkCnt2--; + } + blkCnt2 = (pSrc->numCols-col) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_p_f32(pa,v1,p0); + + pa += blkCnt2; + } + + pa += col; + } + + /* Copy Householder reflectors into R matrix */ + pa = p + pOutR->numCols; + for(k=0;knumRows-col-1; k++) + { + *pa = pTmpA[k+1]; + pa += pOutR->numCols; + } + + p += 1 + pOutR->numCols; + } + + /* Generate Q if requested by user matrix */ + + if (pOutQ != NULL) + { + /* Initialize Q matrix to identity */ + memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows); + + pa = pOutQ->pData; + for(col=0 ; col < pOutQ->numCols; col++) + { + *pa = 1.0f; + pa += pOutQ->numCols+1; + } + + nb = pOutQ->numRows - pOutQ->numCols + 1; + + pc = pOutTau + pOutQ->numCols - 1; + for(col=0 ; col < pOutQ->numCols; col++) + { + int32_t j,k, blkCnt, blkCnt2; + float32_t *pa0,*pa1,*pa2,*pa3,*ptemp; + float32_t temp; + float32x4_t v1,v2,vtemp; + + pos = pSrc->numRows - nb; + p = pOutQ->pData + pos + pOutQ->numCols*pos ; + + + COPY_COL_F32(pOutR,pos,pos,pTmpA); + pTmpA[0] = 1.0f; + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + + pv = pTmpA; + pa = p; + + temp = *pv; + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v1 = vmulq_n_f32(v1, temp); + vst1q_f32(pdst,v1); + + pa += 4; + pdst += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v1 = vmulq_n_f32(v1, temp); + vst1q_p_f32(pdst,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + pv++; + pdst = pTmpB; + pa0 = pa; + pa1 = pa0 + pOutQ->numRows; + pa2 = pa1 + pOutQ->numRows; + pa3 = pa2 + pOutQ->numRows; + + /* Unrolled loop */ + blkCnt = (pOutQ->numRows-pos - 1) >> 2; + k=1; + while(blkCnt > 0) + { + + vtemp = vld1q_f32(pv); + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3)); + + vst1q_f32(pdst,v1); + + pa0 += 4; + pa1 += 4; + pa2 += 4; + pa3 += 4; + pdst += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pdst); + + v2 = vld1q_f32(pa0); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0)); + + v2 = vld1q_f32(pa1); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1)); + + v2 = vld1q_f32(pa2); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2)); + + v2 = vld1q_f32(pa3); + v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3)); + + vst1q_p_f32(pdst,v1,p0); + + pa0 += blkCnt2; + pa1 += blkCnt2; + pa2 += blkCnt2; + pa3 += blkCnt2; + + } + + pa0 += pos + 3*pOutQ->numRows; + pa1 += pos + 3*pOutQ->numRows; + pa2 += pos + 3*pOutQ->numRows; + pa3 += pos + 3*pOutQ->numRows; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-pos; k++) + { + temp = *pv; + blkCnt2 = (pOutQ->numRows-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pdst); + v2 = vld1q_f32(pa); + v1 = vfmaq_n_f32(v1, v2, temp); + vst1q_f32(pdst,v1); + + pdst += 4; + pa += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numRows-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + v1 = vld1q_f32(pdst); + v2 = vld1q_f32(pa); + v1 = vfmaq_n_f32(v1, v2, temp); + vst1q_p_f32(pdst,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + pv++; + pdst = pTmpB; + } + + pa = p; + beta = *pc--; + for(j=0;jnumRows-pos; j++) + { + float32_t f = -beta * pTmpA[j]; + ptemp = pTmpB; + + blkCnt2 = (pOutQ->numCols-pos) >> 2; + while (blkCnt2 > 0) + { + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_f32(pa,v1); + + pa += 4; + ptemp += 4; + + blkCnt2--; + } + blkCnt2 = (pOutQ->numCols-pos) & 3; + if (blkCnt2 > 0) + { + mve_pred16_t p0 = vctp32q(blkCnt2); + + v1 = vld1q_f32(pa); + v2 = vld1q_f32(ptemp); + v1 = vfmaq_n_f32(v1,v2,f); + vst1q_p_f32(pa,v1,p0); + + pa += blkCnt2; + } + + pa += pos; + } + + + nb++; + } + } + + arm_status status = ARM_MATH_SUCCESS; + /* Return to application */ + return (status); +} + +#endif /*#if !defined(ARM_MATH_MVEF)*/ + + +#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/ + + + +#if (!defined(ARM_MATH_MVEF)) || defined(ARM_MATH_AUTOVECTORIZE) + +arm_status _arm_mat_qr_f32( + const arm_matrix_instance_f32 * pSrc, + const float32_t threshold, + arm_matrix_instance_f32 * pOutR, + arm_matrix_instance_f32 * pOutQ, + float32_t * pOutTau, + float32_t *pTmpA, + float32_t *pTmpB + ) + +{ + int32_t col=0; + int32_t nb,pos; + float32_t *pa,*pc; + float32_t beta; + float32_t *pv; + float32_t *pdst; + float32_t *p; + + if (pSrc->numRows < pSrc->numCols) + { + return(ARM_MATH_SIZE_MISMATCH); + } + + memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t)); + pOutR->numCols = pSrc->numCols; + pOutR->numRows = pSrc->numRows; + + p = pOutR->pData; + + pc = pOutTau; + for(col=0 ; col < pSrc->numCols; col++) + { + int32_t i,j,k,blkCnt; + float32_t *pa0,*pa1,*pa2,*pa3; + COPY_COL_F32(pOutR,col,col,pTmpA); + + beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA); + *pc++ = beta; + + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + pv = pTmpA; + pa = p; + for(j=0;jnumCols-col; j++) + { + *pdst++ = *pv * *pa++; + } + pa += col; + pv++; + pdst = pTmpB; + + pa0 = pa; + pa1 = pa0 + pSrc->numCols; + pa2 = pa1 + pSrc->numCols; + pa3 = pa2 + pSrc->numCols; + + /* Unrolled loop */ + blkCnt = (pSrc->numRows-col - 1) >> 2; + k=1; + while(blkCnt > 0) + { + float32_t sum; + + for(j=0;jnumCols-col; j++) + { + sum = *pdst; + + sum += pv[0] * *pa0++; + sum += pv[1] * *pa1++; + sum += pv[2] * *pa2++; + sum += pv[3] * *pa3++; + + *pdst++ = sum; + } + pa0 += col + 3*pSrc->numCols; + pa1 += col + 3*pSrc->numCols; + pa2 += col + 3*pSrc->numCols; + pa3 += col + 3*pSrc->numCols; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-col; k++) + { + for(j=0;jnumCols-col; j++) + { + *pdst++ += *pv * *pa++; + } + pa += col; + pv++; + pdst = pTmpB; + } + + /* A(col:,col:) - beta v tmpb */ + pa = p; + for(j=0;jnumRows-col; j++) + { + float32_t f = beta * pTmpA[j]; + + for(i=0;inumCols-col; i++) + { + *pa = *pa - f * pTmpB[i] ; + pa++; + } + pa += col; + } + + /* Copy Householder reflectors into R matrix */ + pa = p + pOutR->numCols; + for(k=0;knumRows-col-1; k++) + { + *pa = pTmpA[k+1]; + pa += pOutR->numCols; + } + + p += 1 + pOutR->numCols; + } + + /* Generate Q if requested by user matrix */ + + if (pOutQ != NULL) + { + /* Initialize Q matrix to identity */ + memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows); + + pa = pOutQ->pData; + for(col=0 ; col < pOutQ->numCols; col++) + { + *pa = 1.0f; + pa += pOutQ->numCols+1; + } + + nb = pOutQ->numRows - pOutQ->numCols + 1; + + pc = pOutTau + pOutQ->numCols - 1; + for(col=0 ; col < pOutQ->numCols; col++) + { + int32_t i,j,k, blkCnt; + float32_t *pa0,*pa1,*pa2,*pa3; + pos = pSrc->numRows - nb; + p = pOutQ->pData + pos + pOutQ->numCols*pos ; + + + COPY_COL_F32(pOutR,pos,pos,pTmpA); + pTmpA[0] = 1.0f; + pdst = pTmpB; + + /* v.T A(col:,col:) -> tmpb */ + + pv = pTmpA; + pa = p; + for(j=0;jnumRows-pos; j++) + { + *pdst++ = *pv * *pa++; + } + pa += pos; + pv++; + pdst = pTmpB; + pa0 = pa; + pa1 = pa0 + pOutQ->numRows; + pa2 = pa1 + pOutQ->numRows; + pa3 = pa2 + pOutQ->numRows; + + /* Unrolled loop */ + blkCnt = (pOutQ->numRows-pos - 1) >> 2; + k=1; + while(blkCnt > 0) + { + float32_t sum; + + for(j=0;jnumRows-pos; j++) + { + sum = *pdst; + + sum += pv[0] * *pa0++; + sum += pv[1] * *pa1++; + sum += pv[2] * *pa2++; + sum += pv[3] * *pa3++; + + *pdst++ = sum; + } + pa0 += pos + 3*pOutQ->numRows; + pa1 += pos + 3*pOutQ->numRows; + pa2 += pos + 3*pOutQ->numRows; + pa3 += pos + 3*pOutQ->numRows; + pv += 4; + pdst = pTmpB; + k += 4; + blkCnt--; + } + + pa = pa0; + for(;knumRows-pos; k++) + { + for(j=0;jnumRows-pos; j++) + { + *pdst++ += *pv * *pa++; + } + pa += pos; + pv++; + pdst = pTmpB; + } + + pa = p; + beta = *pc--; + for(j=0;jnumRows-pos; j++) + { + float32_t f = beta * pTmpA[j]; + + for(i=0;inumCols-pos; i++) + { + *pa = *pa - f * pTmpB[i] ; + pa++; + } + pa += pos; + } + + + nb++; + } + } + + arm_status status = ARM_MATH_SUCCESS; + /* Return to application */ + return (status); +} + +#endif /* end of test for Helium or Neon availability */ diff --git a/dsppp/tests/debug_test.cpp b/dsppp/tests/debug_test.cpp new file mode 100644 index 00000000..ed010152 --- /dev/null +++ b/dsppp/tests/debug_test.cpp @@ -0,0 +1,45 @@ +extern "C" { + extern void debug_test(); +} + +#include "allocator.h" + +#include +#include + +#include + +#include +#include "dsp/basic_math_functions.h" + + +using namespace arm_cmsis_dsp; + + + +extern Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l); + +template +static void test() +{ + + PrintType>(); +} + +void debug_test() +{ + title("Debug test"); + + + + test(); + + + + +} \ No newline at end of file diff --git a/dsppp/tests/debug_test_external.cpp b/dsppp/tests/debug_test_external.cpp new file mode 100644 index 00000000..795aaa53 --- /dev/null +++ b/dsppp/tests/debug_test_external.cpp @@ -0,0 +1,56 @@ +#include "allocator.h" + +#include +#include + +#include +#include + + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) +Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l) +{ + int nb = l; + Q<33,30> acc0; + Q<33,30> acc1; + Q<33,30> acc2; + Q<33,30> acc3; + for(index_t i=0; i::mk(nb-i)); + acc1 = inner::vmacc(acc1,a1.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + acc2 = inner::vmacc(acc2,a2.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + acc3 = inner::vmacc(acc3,a3.vector_op_tail(i,nb-i),b.vector_op_tail(i,nb-i),inner::vctpq::mk(nb-i)); + } + Q15 r0,r1,r2,r3; + + r0 = inner::from_accumulator(acc0); + r1 = inner::from_accumulator(acc1); + r2 = inner::from_accumulator(acc2); + r3 = inner::from_accumulator(acc3); + + return(r0+r1+r2+r3); +} +#else +Q15 external_debug(const PVector &a0, + const PVector &a1, + const PVector &a2, + const PVector &a3, + const PVector &b, + int l) +{ + (void)a0; + (void)a1; + (void)a2; + (void)a3; + (void)b; + (void)l; + return(a0[0]); +} +#endif \ No newline at end of file diff --git a/dsppp/tests/dot_test.cpp b/dsppp/tests/dot_test.cpp new file mode 100644 index 00000000..53d878c7 --- /dev/null +++ b/dsppp/tests/dot_test.cpp @@ -0,0 +1,213 @@ +extern "C" { + extern void dot_test(); +} + +#include "allocator.h" + +#include +#include +#include + +#include + +#include + +#include "dsp/basic_math_functions.h" +#include "dsp/basic_math_functions_f16.h" + + + + + +template +static void complex_test(const T scale) +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + PVector d; + + PVector res; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + PVector d(NB); + + PVector res(NB); + #endif + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + init_array(d,NB); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + O result = dot(scale*(a+b),c*d); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + O ref; + PVector tmp1; + PVector tmp2; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot_expr(a.const_ptr(), + b.const_ptr(), + c.const_ptr(), + d.const_ptr(), + tmp1.ptr(), + tmp2.ptr(), + scale, + ref,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(result,ref)) + { + printf("dot expr failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + + +template +static void test() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + + PVector res; + #else + PVector a(NB); + PVector b(NB); + + PVector res(NB); + #endif + + init_array(a,NB); + init_array(b,NB); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + O result = dot(a,b); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + O ref; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot(a.const_ptr(),b.const_ptr(),ref,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(result,ref)) + { + printf("dot failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + + +template +void all_dot_test() +{ + + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + using ACC = typename number_traits::accumulator; + constexpr auto v = TestConstant::v; + + title("Dot product"); + + + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + if constexpr (!std::is_same::value) + { + test(); + } + + test(); + test(); + test(); + test(); + test(); + + + title("Dot product with expressions"); + + + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + + complex_test(v); + + complex_test(v); + complex_test(v); + complex_test(v); + if constexpr (!std::is_same::value) + { + complex_test(v); + } + + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + complex_test(v); + + //print_map("Stats",max_stats); + +} + +void dot_test() +{ +#if defined(DOT_TEST) + #if defined(F64_DT) + all_dot_test(); + #endif + #if defined(F32_DT) + all_dot_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_dot_test(); + #endif + #if defined(Q31_DT) + all_dot_test(); + #endif + #if defined(Q15_DT) + all_dot_test(); + #endif + #if defined(Q7_DT) + all_dot_test(); + #endif +#endif +} diff --git a/dsppp/tests/filter_test.cpp b/dsppp/tests/filter_test.cpp new file mode 100644 index 00000000..da5bea2a --- /dev/null +++ b/dsppp/tests/filter_test.cpp @@ -0,0 +1,657 @@ +extern "C" { + extern void filter_test(); +} + +#include "allocator.h" + +#include +#include +#include +#include + +#include + +#include + + +#if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF) + +#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) + + +#define FIR_Q15_CORE(pOutput, nbAcc, nbVecTaps, pSample, vecCoeffs) \ + for (int j = 0; j < nbAcc; j++) { \ + const q15_t *pSmp = &pSample[j]; \ + q63_t acc[4]; \ + \ + acc[j] = 0; \ + for (int i = 0; i < nbVecTaps; i++) { \ + vecIn0 = vld1q(pSmp + 8 * i); \ + acc[j] = vmlaldavaq(acc[j], vecIn0, vecCoeffs[i]); \ + } \ + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc[j], 15); \ + } + +#define FIR_Q15_MAIN_CORE() \ +{ \ + q15_t *pState = S->pState; /* State pointer */ \ + const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ \ + q15_t *pStateCur; /* Points to the current sample of the state */ \ + const q15_t *pSamples; /* Temporary pointer to the sample buffer */ \ + q15_t *pOutput; /* Temporary pointer to the output buffer */ \ + const q15_t *pTempSrc; /* Temporary pointer to the source data */ \ + q15_t *pTempDest; /* Temporary pointer to the destination buffer */\ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */\ + int32_t blkCnt; \ + q15x8_t vecIn0; \ + \ + /* \ + * load coefs \ + */ \ + q15x8_t vecCoeffs[NBVECTAPS]; \ + \ + for (int i = 0; i < NBVECTAPS; i++) \ + vecCoeffs[i] = vldrhq_s16(pCoeffs + 8 * i); \ + \ + /* \ + * pState points to state array which contains previous frame (numTaps - 1) samples \ + * pStateCur points to the location where the new input data should be written \ + */ \ + pStateCur = &(pState[(numTaps - 1u)]); \ + pTempSrc = pSrc; \ + pSamples = pState; \ + pOutput = pDst; \ + \ + blkCnt = blockSize >> 2; \ + while (blkCnt > 0) { \ + /* \ + * Save 4 input samples in the history buffer \ + */ \ + vstrhq_s32(pStateCur, vldrhq_s32(pTempSrc)); \ + pStateCur += 4; \ + pTempSrc += 4; \ + \ + FIR_Q15_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs); \ + pSamples += 4; \ + \ + blkCnt--; \ + } \ + \ + /* tail */ \ + int32_t residual = blockSize & 3; \ + \ + for (int i = 0; i < residual; i++) \ + *pStateCur++ = *pTempSrc++; \ + \ + FIR_Q15_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs); \ + \ + /* \ + * Copy the samples back into the history buffer start \ + */ \ + pTempSrc = &pState[blockSize]; \ + pTempDest = pState; \ + \ + /* current compiler limitation */ \ + blkCnt = (numTaps - 1) >> 3; \ + while (blkCnt > 0) \ + { \ + vstrhq_s16(pTempDest, vldrhq_s16(pTempSrc)); \ + pTempSrc += 8; \ + pTempDest += 8; \ + blkCnt--; \ + } \ + blkCnt = (numTaps - 1) & 7; \ + if (blkCnt > 0) \ + { \ + mve_pred16_t p = vctp16q(blkCnt); \ + vstrhq_p_s16(pTempDest, vldrhq_z_s16(pTempSrc, p), p); \ + } \ +} + +static void arm_fir_q15_25_32_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 32 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + +static void arm_fir_q15_17_24_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 24 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + + +static void arm_fir_q15_9_16_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 16 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + +static void arm_fir_q15_1_8_mve(const arm_fir_instance_q15 * S, + const q15_t * __restrict pSrc, + q15_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 8 + #define NBVECTAPS (NBTAPS / 8) + FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS + #undef NBTAPS +} + + +void debug_arm_fir_q15( + const arm_fir_instance_q15 * S, + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCur; /* Points to the current sample of the state */ + const q15_t *pSamples; /* Temporary pointer to the sample buffer */ + q15_t *pOutput; /* Temporary pointer to the output buffer */ + const q15_t *pTempSrc; /* Temporary pointer to the source data */ + q15_t *pTempDest; /* Temporary pointer to the destination buffer */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t blkCnt; + q15x8_t vecIn0; + uint32_t tapsBlkCnt = (numTaps + 7) / 8; + q63_t acc0, acc1, acc2, acc3; + + +int32_t nbTaps = (numTaps + 7) >> 3; + +switch(nbTaps) { + + case 1: + arm_fir_q15_1_8_mve(S, pSrc, pDst, blockSize); + return; + case 2: + arm_fir_q15_9_16_mve(S, pSrc, pDst, blockSize); + return; + case 3: + arm_fir_q15_17_24_mve(S, pSrc, pDst, blockSize); + return; + case 4: + arm_fir_q15_25_32_mve(S, pSrc, pDst, blockSize); + return; + } + /* + * pState points to state array which contains previous frame (numTaps - 1) samples + * pStateCur points to the location where the new input data should be written + */ + pStateCur = &(pState[(numTaps - 1u)]); + pTempSrc = pSrc; + pSamples = pState; + pOutput = pDst; + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + acc2 = 0LL; + acc3 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + //INIT_SYSTICK; + //START_CYCLE_MEASUREMENT; + int i = tapsBlkCnt; + //startSectionNB(3); + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[1]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc2 = vmlaldavaq(acc2, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[3]); + acc3 = vmlaldavaq(acc3, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + //stopSectionNB(3); + //STOP_CYCLE_MEASUREMENT; + + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc2, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc3, 15); + + pSamples += 4; + /* + * Decrement the sample block loop counter + */ + blkCnt--; + } + + uint32_t residual = blockSize & 3; + switch (residual) + { + case 3: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + acc2 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + *(q15x8_t *) pStateCur = *(q15x8_t *) pTempSrc; + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[4]); + acc2 = vmlaldavaq(acc2, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + acc0 = asrl(acc0, 15); + acc1 = asrl(acc1, 15); + acc2 = asrl(acc2, 15); + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc2, 15); + } + break; + + case 2: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + acc1 = 0LL; + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + vecIn0 = vld1q(&pSamplesTmp[2]); + acc1 = vmlaldavaq(acc1, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc1, 15); + } + break; + + case 1: + { + const q15_t *pCoeffsTmp = pCoeffs; + const q15_t *pSamplesTmp = pSamples; + + acc0 = 0LL; + + /* + * Save 8 input samples in the history buffer + */ + vst1q(pStateCur, vld1q(pTempSrc)); + pStateCur += 8; + pTempSrc += 8; + + int i = tapsBlkCnt; + while (i > 0) + { + /* + * load 8 coefs + */ + q15x8_t vecCoeffs = *(q15x8_t *) pCoeffsTmp; + + vecIn0 = vld1q(pSamplesTmp); + acc0 = vmlaldavaq(acc0, vecIn0, vecCoeffs); + + pSamplesTmp += 8; + pCoeffsTmp += 8; + /* + * Decrement the taps block loop counter + */ + i--; + } + + *pOutput++ = (q15_t) MVE_ASRL_SAT16(acc0, 15); + } + break; + + } + + /* + * Copy the samples back into the history buffer start + */ + pTempSrc = &pState[blockSize]; + pTempDest = pState; + + blkCnt = numTaps >> 3; + while (blkCnt > 0U) + { + vst1q(pTempDest, vld1q(pTempSrc)); + pTempSrc += 8; + pTempDest += 8; + blkCnt--; + } + blkCnt = numTaps & 7; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + vstrhq_p_s16(pTempDest, vld1q(pTempSrc), p0); + } +} +#endif + +template +struct FirType; + +template<> +struct FirType +{ + typedef arm_fir_instance_f32 type; + static void init_state(type * S, + uint16_t numTaps, + const float32_t * pCoeffs, + float32_t * pState, + uint32_t blockSize) + { + arm_fir_init_f32(S,numTaps,pCoeffs,pState,blockSize); + }; + + static void init_coef(float32_t *coefs,uint16_t numTaps) + { + for(int i=0;i +struct FirType +{ + typedef arm_fir_instance_q15 type; + static void init_state(type * S, + uint16_t numTaps, + const Q15 * pCoeffs, + Q15 * pState, + uint32_t blockSize) + { + arm_fir_init_q15(S,numTaps, + reinterpret_cast(pCoeffs), + reinterpret_cast(pState),blockSize); + }; + + static void init_coef(Q15 *coefs,uint16_t numTaps) + { + for(int i=0;i +struct FIR { + + FIR(const PVector &coefs):coef_(coefs),state_(T{}) + {}; + + + PVector filter(const PVector &signal) + { + constexpr int UNROLL_FACTOR = 4; + PVector res(T{}); + using acc_type = typename number_traits::accumulator; + std::array accu; + index_t i=0; + +#if defined(ARM_COMPUTE_DISABLE_UNROLL) + #pragma clang loop unroll(disable) +#endif + for(;i<=BLOCK-UNROLL_FACTOR;i+=UNROLL_FACTOR) + { + + state_.sub(TAPS-1+i,TAPS-1+i+UNROLL_FACTOR) = copy(signal.sub(i,i+UNROLL_FACTOR)); + + //INIT_SYSTICK; + //START_CYCLE_MEASUREMENT; + //startSectionNB(2); + results(accu) = + dot(unroll( + [i,this](index_t k){return state_.sub(i+k,i+k+TAPS);}), + replicate(coef_) + ); + //stopSectionNB(2); + //STOP_CYCLE_MEASUREMENT; + + for(index_t k=0;k coef_; + PVector state_; +}; + +template +static void test() +{ + constexpr int NB = BLOCK; + std::cout << "----\r\n(" << BLOCK << "," << TAPS << ")\r\n"; + + typename FirType::type S; + PVector signal; + PVector coefs; + + FirType::init_coef(coefs.ptr(),TAPS); + + init_array(signal,NB); + FIR fir(coefs); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + PVector res = fir.filter(signal); + //PVector res; + //fir.purec(signal.const_ptr(),res.ptr()); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + T* state; + T* coefsb; + state=(T*)malloc(sizeof(T)*(TAPS+BLOCK+BLOCK)); + coefsb=(T*)malloc(sizeof(T)*(TAPS+32)); + memset(coefsb,0,sizeof(T)*(TAPS+32)); + for(int i =0;i::init_state(&S,TAPS,coefsb,state,BLOCK); + PVector ref; + //std::cout << "---\r\n"; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + arm_fir_q15(&S, + reinterpret_cast(signal.const_ptr()), + reinterpret_cast(ref.ptr()),BLOCK); + STOP_CYCLE_MEASUREMENT; + + + if (!validate(res.const_ptr(),ref.const_ptr(),BLOCK)) + { + printf("fir failed \r\n"); + } + + free(state); + free(coefsb); + + +} + + + + +template +void all_filter_test() +{ + + title("FIR test"); + + + test(); + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + + + test(); + test(); + test(); + test(); + test(); + + +} + +void filter_test() +{ + //all_filter_test(); +} \ No newline at end of file diff --git a/dsppp/tests/fusion_test.cpp b/dsppp/tests/fusion_test.cpp new file mode 100644 index 00000000..0711cd1b --- /dev/null +++ b/dsppp/tests/fusion_test.cpp @@ -0,0 +1,247 @@ +extern "C" { + extern void fusion_test(); +} + +#include "allocator.h" + +#include + +#include +#include +#include +#include + +#include + +#include + +template +static void test() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + #endif + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + + #if defined(STATIC_TEST) + PVector resa; + PVector resb; + #else + PVector resa(NB); + PVector resb(NB); + #endif + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + results(resa,resb) = Merged{a + b,a + c}; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + PVector refa; + PVector refb; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_add(a.const_ptr(),b.const_ptr(),refa.ptr(),NB); + cmsisdsp_add(a.const_ptr(),c.const_ptr(),refb.ptr(),NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(resa.const_ptr(),refa.const_ptr(),NB)) + { + printf("add a failed \r\n"); + + } + + if (!validate(resb.const_ptr(),refb.const_ptr(),NB)) + { + printf("add b failed \r\n"); + + } + + std::cout << "=====\r\n"; +} + + +template +static void test2() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + PVector b; + PVector c; + #else + PVector a(NB); + PVector b(NB); + PVector c(NB); + #endif + using Acc = typename number_traits::accumulator; + + + init_array(a,NB); + init_array(b,NB); + init_array(c,NB); + + Acc resa,resb,refa,refb; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + std::tie(resa,resb) = dot(Merged{expr(a),expr(a)}, + Merged{expr(b),expr(c)}); + stopSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_dot(a.const_ptr(),b.const_ptr(),refa,NB); + cmsisdsp_dot(a.const_ptr(),c.const_ptr(),refb,NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(resa,refa)) + { + printf("dot a failed \r\n"); + + } + + if (!validate(resb,refb)) + { + printf("dot b failed \r\n"); + + } + + std::cout << "=====\r\n"; + + +} + +template +static void test3() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + + constexpr int U = 2; + #if defined(STATIC_TEST) + PVector a[U]; + PVector b[U]; + #else + PVector a[U]={PVector(NB),PVector(NB)}; + PVector b[U]={PVector(NB),PVector(NB)}; + #endif + + using Acc = typename number_traits::accumulator; + + for(int i=0;i res; + Acc ref[U]; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(3); + results(res) = dot(unroll( + [&a](index_t k){return expr(a[k]);}), + unroll( + [&b](index_t k){return expr(b[k]);}) + ); + stopSectionNB(3); + STOP_CYCLE_MEASUREMENT; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + for(int i=0;i +void all_fusion_test() +{ + + const int nb_tails = TailForTests::tail; + const int nb_loops = TailForTests::loop; + + title("Vector Fusion"); + + test(); + test(); + test(); + test(); + test(); + test(); + + title("Dot Product Fusion"); + + test2(); + test2(); + test2(); + test2(); + test2(); + test2(); + + title("Unroll Fusion"); + + test3(); + test3(); + test3(); + test3(); + test3(); + test3(); +} + +void fusion_test() +{ +#if defined(FUSION_TEST) + #if defined(F64_DT) + all_fusion_test(); + #endif + #if defined(F32_DT) + all_fusion_test(); + #endif + #if defined(F16_DT) && !defined(DISABLEFLOAT16) + all_fusion_test(); + #endif + #if defined(Q31_DT) + all_fusion_test(); + #endif + #if defined(Q15_DT) + all_fusion_test(); + #endif + #if defined(Q7_DT) + all_fusion_test(); + #endif +#endif + +} \ No newline at end of file diff --git a/dsppp/tests/matrix_test.cpp b/dsppp/tests/matrix_test.cpp new file mode 100644 index 00000000..e18a827a --- /dev/null +++ b/dsppp/tests/matrix_test.cpp @@ -0,0 +1,1863 @@ +extern "C" { + extern void matrix_test(); +} + +#include "allocator.h" + +#include +#include +#include +#include + +#include + +#include +#include "boost/mp11.hpp" +using namespace boost::mp11; + + +extern "C" { +#include "dsp/matrix_functions.h" +#include "dsp/matrix_utils.h" +} + +template +struct MatTestConstant; + +template<> +struct MatTestConstant +{ + constexpr static double value = 0.001; + constexpr static double half = 0.5; +}; + +template<> +struct MatTestConstant +{ + constexpr static float value = 0.001f; + constexpr static float half = 0.5f; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct MatTestConstant +{ + constexpr static float16_t value = (float16_t)0.001f; + constexpr static float16_t half = (float16_t)0.5f; + +}; +#endif + +template<> +struct MatTestConstant +{ + constexpr static Q7 value = 0.001_q7; + constexpr static Q7 half = 0.5_q7; +}; + +template<> +struct MatTestConstant +{ + constexpr static Q15 value = 0.001_q15; + constexpr static Q15 half = 0.5_q15; +}; + +template<> +struct MatTestConstant +{ + constexpr static Q31 value = 0.001_q31; + constexpr static Q31 half = 0.5_q31; +}; + + +template +struct ErrThreshold +{ + constexpr static float abserr = 0; + constexpr static float relerr = 0; + constexpr static float abserr_cholesky = 0; + constexpr static float relerr_cholesky = 0; + constexpr static float abserr_householder = 0; + constexpr static float relerr_householder = 0; + constexpr static float abserr_qr = 0; + constexpr static float relerr_qr = 0; + constexpr static float abserr_inv = 0; + constexpr static float relerr_inv = 0; +}; + +// Should be more accurate than F32 but right know +// we only check there is no regression compared to f32 +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 3e-4; + constexpr static float relerr_cholesky = 1e-4; + + constexpr static float abserr_householder = ABS_ERROR; + constexpr static float relerr_householder = REL_ERROR; + constexpr static float abserr_qr = ABS_ERROR; + constexpr static float relerr_qr = REL_ERROR; + + constexpr static float abserr_inv = ABS_ERROR; + constexpr static float relerr_inv = REL_ERROR; +}; + +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 3e-4; + constexpr static float relerr_cholesky = 1e-4; + + constexpr static float abserr_householder = ABS_ERROR; + constexpr static float relerr_householder = REL_ERROR; + constexpr static float abserr_qr = ABS_ERROR; + constexpr static float relerr_qr = REL_ERROR; + + constexpr static float abserr_inv = 4.0e-6; + constexpr static float relerr_inv = 5.0e-6; +}; + +#if !defined(DISABLEFLOAT16) +template<> +struct ErrThreshold +{ + constexpr static float abserr = ABS_ERROR; + constexpr static float relerr = REL_ERROR; + constexpr static float abserr_cholesky = 2e-1; + constexpr static float relerr_cholesky = 2e-1; + + constexpr static float abserr_householder = 2e-4; + constexpr static float relerr_householder = 2e-3; + // 32x32 is not numerically behaving well with + // the matrix used as input + constexpr static float abserr_qr = 2.0; + constexpr static float relerr_qr = 1e-2; + + constexpr static float abserr_inv = 3e-2; + constexpr static float relerr_inv = 3e-2; +}; +#endif + +void cmsisdsp_mat_inv(float64_t *amod, + float64_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f64 src; + arm_matrix_instance_f64 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f64(&src,&dst); + (void)status; +}; + +void cmsisdsp_mat_inv(float32_t *amod, + float32_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f32 src; + arm_matrix_instance_f32 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f32(&src,&dst); + (void)status; +}; + +#if !defined(DISABLEFLOAT16) +void cmsisdsp_mat_inv(float16_t *amod, + float16_t* b, + uint32_t r,uint32_t c) +{ + arm_matrix_instance_f16 src; + arm_matrix_instance_f16 dst; + + + src.numRows = r; + src.numCols = c; + src.pData = amod; + + dst.numRows = r; + dst.numCols = c; + dst.pData = b; + + arm_status status = arm_mat_inverse_f16(&src,&dst); + (void)status; +}; +#endif + +const float32_t mat64[64] = {0.395744, 0.623798, 0.885422, 0.95415, 0.310384, 0.257541, + 0.631426, 0.424491, 0.130945, 0.799959, 0.133693, 0.479455, + 0.519254, 0.381039, 0.617455, 0.748273, 0.146944, 0.928945, + 0.430936, 0.508207, 0.829023, 0.358027, 0.999501, 0.851953, + 0.273895, 0.685898, 0.0436612, 0.295212, 0.467651, 0.0515567, + 0.21037, 0.607475, 0.570295, 0.281109, 0.979219, 0.0947969, + 0.319016, 0.398405, 0.349953, 0.710002, 0.431597, 0.447659, + 0.0747669, 0.057063, 0.165648, 0.773106, 0.135765, 0.709327, + 0.873836, 0.292361, 0.00202529, 0.392942, 0.520183, 0.0528055, + 0.797982, 0.613497, 0.509682, 0.0435791, 0.780526, 0.960582, + 0.535914, 0.216113, 0.134108, 0.225859}; + +const float32_t mat16[16] = {1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 5.0, 6.0, + 3.0, 5.0, 9.0, 10.0, 4.0, 6.0, 10.0, 16.0}; + +const float32_t mat256[256] = {0.97936, 0.498105, 0.452618, 0.299761, 0.688624, 0.247212, \ + 0.228337, 0.22905, 0.563815, 0.251998, 0.5238, 0.141223, 0.0980689, \ + 0.79112, 0.771182, 0.890995, 0.0256181, 0.0377277, 0.575629, \ + 0.648138, 0.926218, 0.803878, 0.620333, 0.325635, 0.587355, 0.041795, \ + 0.934271, 0.0690131, 0.0240136, 0.800828, 0.522999, 0.374706, \ + 0.266977, 0.208028, 0.112878, 0.0389899, 0.658311, 0.205067, \ + 0.244172, 0.0762778, 0.190575, 0.677312, 0.0682093, 0.367328, \ + 0.0191464, 0.988968, 0.437477, 0.130622, 0.907823, 0.0116559, \ + 0.614526, 0.447443, 0.0126975, 0.995496, 0.947676, 0.659996, \ + 0.321547, 0.725415, 0.658426, 0.0243924, 0.0843519, 0.351748, \ + 0.974332, 0.673381, 0.375012, 0.719626, 0.721219, 0.766905, \ + 0.17065, 0.648905, 0.770983, 0.360008, 0.344226, 0.179633, 0.347905, \ + 0.555561, 0.742615, 0.908389, 0.806959, 0.176078, 0.872167, \ + 0.321839, 0.098607, 0.954515, 0.627286, 0.235082, 0.746179, 0.163606, \ + 0.899323, 0.871471, 0.712448, 0.956971, 0.736687, 0.750702, 0.843348, \ + 0.302435, 0.444862, 0.0644597, 0.765519, 0.518397, 0.765541, \ + 0.900375, 0.201853, 0.490325, 0.721786, 0.893647, 0.774724, \ + 0.0983631, 0.339887, 0.526084, 0.0786152, 0.515697, 0.438801, \ + 0.226628, 0.125093, 0.886642, 0.617766, 0.71696, 0.473172, 0.640949, \ + 0.67688, 0.676214, 0.453662, 0.345796, 0.608999, 0.904448, 0.0965741, \ + 0.00461771, 0.467399, 0.292235, 0.0418646, 0.116632, 0.0766192, \ + 0.269051, 0.411649, 0.0538381, 0.973959, 0.667106, 0.301662, \ + 0.977206, 0.891751, 0.420267, 0.441334, 0.0896179, 0.249969, \ + 0.672614, 0.623966, 0.609733, 0.320772, 0.39723, 0.845196, 0.653877, \ + 0.0599186, 0.340188, 0.199787, 0.598104, 0.45664, 0.920485, 0.969439, \ + 0.446555, 0.0932837, 0.0247635, 0.747644, 0.438759, 0.639154, \ + 0.754049, 0.379433, 0.968655, 0.0452146, 0.208123, 0.252654, \ + 0.261898, 0.608665, 0.145211, 0.395368, 0.799111, 0.697823, \ + 0.382906, 0.456515, 0.262579, 0.284169, 0.881488, 0.860877, 0.155548, \ + 0.537387, 0.804235, 0.311383, 0.183216, 0.677692, 0.829542, 0.406049, \ + 0.860392, 0.467668, 0.385633, 0.654692, 0.841125, 0.178406, \ + 0.668945, 0.369609, 0.809711, 0.454593, 0.632028, 0.605791, 0.643851, \ + 0.787023, 0.285633, 0.832216, 0.30892, 0.303559, 0.704898, 0.61118, \ + 0.435547, 0.173678, 0.788689, 0.319511, 0.648378, 0.635417, 0.125127, \ + 0.310251, 0.800819, 0.4863, 0.924361, 0.308059, 0.952175, 0.449844, \ + 0.215496, 0.257826, 0.556383, 0.259735, 0.197234, 0.0509903, 0.21474, \ + 0.145085, 0.41288, 0.876758, 0.096721, 0.228955, 0.0152248, 0.126501, \ + 0.28899, 0.336668, 0.580015, 0.932761, 0.989783, 0.667379, \ + 0.798751, 0.587173, 0.445902, 0.041448, 0.311878, 0.0332857, \ + 0.401984, 0.795049, 0.8222, 0.678648, 0.807558}; + +template typename A> +void init_mat(Matrix &pDst,std::size_t r,std::size_t c) +{ + const float32_t *p; + if ((r==4) && (r==c)) + { + p = mat16; + } + + if ((r==8) && (r==c)) + { + p = mat64; + } + + if ((r==16) && (r==c)) + { + p = mat256; + } + + + for(std::size_t i=0;i typename A, + typename M> +void _matinv(const Matrix &a,M && res) +{ + + Matrix b = a; + + const vector_length_t nb_rows = a.rows(); + const vector_length_t nb_cols = a.columns(); + + + for(index_t r=0;r < nb_rows ; r++) + { + res.row(r) = T{}; + res(r,r) = number_traits::one(); + } + + + for(index_t c=0;c < nb_cols ; c++) + { + T pivot = b(c,c); + index_t selectedRow = c; + + + for(index_t r=c+1;r < nb_rows ; r++) + { + T newPivot = b(r,c); + if (_abs(newPivot)>_abs(pivot)) + { + pivot = newPivot; + selectedRow = r; + } + } + + if ((pivot!=T{}) && (selectedRow != c)) + { + swap(b.row(c,c),b.row(selectedRow,c)); + swap(res.row(c),res.row(selectedRow)); + } + else if (pivot == T{}) + { + break; + } + + pivot = number_traits::one() / pivot; + + b.row(c,c) *= pivot; + res.row(c) *= pivot; + + index_t r=0; + + for(;r < c ; r++) + { + const T tmp = b(r,c); + b.row(r,c) -= b.row(c,c)*tmp; + res.row(r) -= res.row(c)*tmp; + } + + for(r=c+1;r < nb_rows ; r++) + { + const T tmp = b(r,c); + b.row(r,c) -= b.row(c,c)*tmp; + res.row(r) -= res.row(c)*tmp; + } + + } + + +} + +template typename A, + typename std::enable_if<(NB>0),bool>::type = true> +Matrix matinv(const Matrix &a) +{ + Matrix res; + _matinv(a,res); + return(res); +} + +template typename A, + typename std::enable_if<(NB<0),bool>::type = true> +Matrix matinv(const Matrix &a) +{ + Matrix res(a.rows(),a.columns()); + return (_matinv(a,res)); + return(res); +} + +template typename A, + typename std::enable_if<(NB<0),bool>::type = true> +void matinv(Matrix &res, const Matrix &a) +{ + (void)_matinv(a,res); +} + + +template +void testinv() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,C); + #endif + + init_mat(a,R,C); + + #if !defined(STATIC_TEST) + PMat res(R,C); + #endif + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = matinv(a); + #else + matinv(res,a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + PMat amod(a); + PMat cmsis_res(R,C); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + cmsisdsp_mat_inv(amod.ptr(),cmsis_res.ptr(),R,C); + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr_inv,ErrThreshold::relerr_inv)) + { + printf("inv failed \r\n"); + + } + + std::cout << "=====\r\n"; + +} + +template +void testadd() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat a; + PMat b; + #else + PMat a(R,C); + PMat b(R,C); + #endif + + init_array(a,R*C); + init_array(b,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = a+b; + #else + PMat res = a+b; + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); +// + //std::cout << "a: " << IsVector::value << "\r\n"; + //std::cout << "b: " << IsVector::value << "\r\n"; + //std::cout << "a+b: " << IsVector::value << "\r\n"; + //std::cout << "res: " << IsVector::value << "\r\n"; + //std::cout << "same: " << SameElementType::value << "\r\n"; +// + //std::cout << "vec inst: " << has_vector_inst() << "\r\n"; + //std::cout << "vec index pair: " << vector_idx_pair() << "\r\n"; + //std::cout << "must use mat idx: " << must_use_matrix_idx_pair() << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + cmsisdsp_mat_add(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("add failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + +template +void testdiag() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(R); + #endif + init_array(a,R); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res=PMat::diagonal(a); + #else + PMat res=PMat::diagonal(a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + const T* ap = a.const_ptr(); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + T* refp = cmsis_res.ptr(); + + UNROLL_LOOP + for(index_t row=0;row < R; row++) + { + UNROLL_LOOP + for(index_t col=0;col < C; col++) + { + if (row != col) + { + refp[row*C+col] = T{}; + } + else + { + refp[row*C+col] = ap[row]; + } + } + } + STOP_CYCLE_MEASUREMENT; + + + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R*C, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("diag failed \r\n"); + } + + std::cout << "=====\r\n"; +} + + + +template +void testouter() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + PVector a; + PVector b; + init_array(a,R); + init_array(b,C); + + b = b + b; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + PMat res = outer(a,b); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + CMSISOuter::run(a.const_ptr(),b.const_ptr(),cmsis_res.ptr(),R,C); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout<::abserr,ErrThreshold::relerr)) + { + printf("outer failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + +template +void testview() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(R); + #endif + init_array(a,R); + + #if defined(STATIC_TEST) + PMat res=PMat::diagonal(a); + #else + PMat res=PMat::diagonal(a); + #endif + //std::cout << res; + constexpr int subsize = 8; + constexpr int subpos = 8; + auto r = res.sub(Slice(subpos,subpos+subsize),Slice(subpos,subpos+subsize)); + + #if defined(STATIC_TEST) + PMat resb; + #else + PMat resb(subsize,subsize); + #endif + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + resb = r+r; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + PMat cmsis_res; + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + DISABLE_LOOP_UNROLL + for(index_t row=0;row < subsize ; row++) + { + DISABLE_LOOP_UNROLL + for(index_t col=0;col < subsize ; col++) + { + cmsis_res(row,col) = r(row,col)+r(row,col); + } + } + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout<::abserr,ErrThreshold::relerr)) + { + printf("sub matrix failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + + +template +void testmatvec() +{ + + using STO = typename vector_traits::storage_type; + + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(C); + #endif + init_array(a,C); + + #if defined(STATIC_TEST) + PMat m; + #else + PMat m(R,C); + #endif + init_array(m,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector res = dot(m,a); + #else + PVector res = dot(m,a); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector cmsis_res; + #else + PVector cmsis_res(R); + #endif + typename CMSISMatrixType::type S; + S.numRows = R; + S.numCols = C; + S.pData = reinterpret_cast(const_cast(m.ptr())); + + + startSectionNB(2); + cmsis_mat_vec_mult(&S, a.const_ptr(), cmsis_res.ptr()); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times vector failed \r\n"); + } + std::cout << "=====\r\n"; + +} + +template +void testcomplexmatvec() +{ + const T scalar = MatTestConstant::half; + using STO = typename vector_traits::storage_type; + + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PVector a; + PVector b; + #else + PVector a(C); + PVector b(C); + #endif + init_array(a,C); + init_array(b,C); + + #if defined(STATIC_TEST) + PMat m; + #else + PMat m(R,C); + #endif + init_array(m,R*C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PVector tmpv = a + b * scalar; + PVector res = dot(m,tmpv); + #else + PVector tmpv = a + b * scalar; + PVector res = dot(m,tmpv); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector cmsis_res; + PVector tmp; + #else + PVector cmsis_res(R); + PVector tmp(C); + #endif + typename CMSISMatrixType::type S; + S.numRows = R; + S.numCols = C; + S.pData = reinterpret_cast(const_cast(m.ptr())); + + + startSectionNB(2); + cmsis_complex_mat_vec(&S, + a.const_ptr(), + b.const_ptr(), + scalar, + tmp.ptr(), + cmsis_res.ptr()); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + + //std::cout << cmsis_res; + + if (!validate(res.const_ptr(),cmsis_res.const_ptr(),R, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times vector expression failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + + +template +void testmatmult() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << K << " x " << C << "\r\n"; + + using S = typename CMSISMatrixType::scalar; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R,K); + #endif + init_array(ma,R*K); + + #if defined(STATIC_TEST) + PMat mb; + #else + PMat mb(K,C); + #endif + init_array(mb,K*C); + + + + mb += TestConstant::small; + + //std::cout << ma; + //std::cout << mb; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = dot(ma,mb); + #else + PMat res = dot(ma,mb); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); + //std::cout << ma; + //std::cout << mb; + //std::cout << res; + + + //std::cout << IsMatrix::value << "\r\n"; + + PMat tmp(C,K); + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(R,C); + #endif + + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = K; + SA.pData = reinterpret_cast(ma.ptr()); + + typename CMSISMatrixType::type SB; + SB.numRows = K; + SB.numCols = C; + SB.pData = reinterpret_cast(mb.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = R; + RES.numCols = C; + RES.pData = reinterpret_cast(cmsis_res.ptr()); + + + startSectionNB(2); + cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << cmsis_res; + + if (!validate(res,cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times matrix expression failed \r\n"); + } + + std::cout << "=====\r\n"; + +} + +template +void testsubmatmult() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << K << " x " << C << "\r\n"; + + using S = typename CMSISMatrixType::scalar; + constexpr int TOTALA = 4 + 2*K + 2*R + K*R; + constexpr int TOTALB = 4 + 2*C + 2*K + C*K; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R+2,K+2); + #endif + init_array(ma,TOTALA); + + #if defined(STATIC_TEST) + PMat mb; + #else + PMat mb(K+2,C+2); + #endif + init_array(mb,TOTALB); + + + + mb += MatTestConstant::value; + + //std::cout << ma; + //std::cout << mb; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat res(T{}); + #else + PMat res(R,C,T{}); + #endif + startSectionNB(1); + res.sub(Slice(0,R),Slice(0,C)) = copy(dot(ma.sub(Slice(0,R),Slice(0,K)),mb.sub(Slice(0,K),Slice(0,C)))); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //PrintType(); + //PrintType(); + //std::cout << ma; + //std::cout << mb; + //std::cout << res; + + + //std::cout << IsMatrix::value << "\r\n"; + + PMat cmsis_res(R,C); + PMat cmsis_ma(R,K); + PMat cmsis_mb(K,C); + PMat tmp(C,K); + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = K; + SA.pData = reinterpret_cast(cmsis_ma.ptr()); + + typename CMSISMatrixType::type SB; + SB.numRows = K; + SB.numCols = C; + SB.pData = reinterpret_cast(cmsis_mb.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = R; + RES.numCols = C; + RES.pData = reinterpret_cast(cmsis_res.ptr()); + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(2); + cmsis_ma = copy(ma.sub(Slice(0,R),Slice(0,K))); + cmsis_mb = copy(mb.sub(Slice(0,K),Slice(0,C))); + cmsis_mat_mult(&SA, &SB, &RES,reinterpret_cast(tmp.ptr())); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << cmsis_res; + + if (!validate(res.sub(Slice(0,R),Slice(0,C)),cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix times matrix expression failed \r\n"); + } + + + std::cout << "=====\r\n"; +} + + +template +void testmattranspose() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + + #if defined(STATIC_TEST) + PMat ma; + #else + PMat ma(R,C); + #endif + init_array(ma,R*C); + + + //PrintType(); + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + #if defined(STATIC_TEST) + PMat res = ma.transpose(); + #else + PMat res = ma.transpose(); + #endif + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << IsMatrix::value << "\r\n"; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PMat cmsis_res; + #else + PMat cmsis_res(C,R); + #endif + + typename CMSISMatrixType::type SA; + SA.numRows = R; + SA.numCols = C; + SA.pData = reinterpret_cast::scalar*>(ma.ptr()); + + typename CMSISMatrixType::type RES; + RES.numRows = C; + RES.numCols = R; + RES.pData = reinterpret_cast::scalar*>(cmsis_res.ptr()); + + + startSectionNB(2); + cmsis_mat_trans(&SA, &RES); + startSectionNB(2); + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + if (!validate(res,cmsis_res, + ErrThreshold::abserr,ErrThreshold::relerr)) + { + printf("matrix transpose failed \r\n"); + } + + + std::cout << "=====\r\n"; +} + + +#if !defined(DISABLEFLOAT16) +static float16_t _gen_sqrt(const float16_t v) +{ + return((float16_t)sqrtf(v)); +} +#endif + +static float32_t _gen_sqrt(const float32_t v) +{ + return(sqrtf(v)); +} + +static float64_t _gen_sqrt(const float64_t v) +{ + return(sqrt(v)); +} + +template typename A, + typename V,typename T> +inline T _householder(Vector &res,const V&v,const T eps) +{ + T alpha = v[0]; + T tau; + T beta; + if (v.length()==1) + { + res[0] = T{}; + return(T{}); + } + T xnorm2 = dot(v.sub(1),v.sub(1)); + + //std::cout << xnorm2 << "\r\n"; + if (xnorm2 <= eps) + { + tau = T{}; + res = T{}; + } + else + { + if (alpha<=0) + { + beta = _gen_sqrt(alpha*alpha+xnorm2); + } + else + { + beta = -_gen_sqrt(alpha*alpha+xnorm2); + } + T r = number_traits::one() / (alpha - beta); + res = v * r; + tau = (beta - alpha)/beta; + res[0] = number_traits::one(); + } + return(tau); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold) +{ + constexpr int NB = StaticLength::value; + Vector res; + T beta = _householder(res,v,threshold); + return std::tuple>(beta,res); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold) +{ + Vector res(v.length()); + T beta = _householder(res,v,threshold); + return std::tuple>(beta,res); +} + +template::value && + SameElementType::value && + IsVector::value,bool>::type = true> +auto householder(const V&v,const T threshold,TMP &res) +{ + T beta = _householder(res,v,threshold); + return beta; +} + +template +struct HouseholderThreshold; + +#if !defined(DISABLEFLOAT16) +template<> +struct HouseholderThreshold +{ + static constexpr float16_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F16; +}; +#endif + +template<> +struct HouseholderThreshold +{ + static constexpr float64_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F64; +}; + + +template<> +struct HouseholderThreshold +{ + static constexpr float32_t value = DEFAULT_HOUSEHOLDER_THRESHOLD_F32; +}; + + +template +static void testHouseholder() +{ + std::cout << "----\r\n" << "N = " << NB << "\r\n"; + #if defined(STATIC_TEST) + PVector a; + #else + PVector a(NB); + #endif + + cmsis_init_householder(a.ptr(),NB); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + auto res = householder(a,HouseholderThreshold::value); + //PVector res;// = a + b; + //float res_beta=0; + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #if defined(STATIC_TEST) + PVector ref; + #else + PVector ref(NB); + #endif + T ref_beta = cmsis_householder(a.const_ptr(),ref.ptr(),NB); + STOP_CYCLE_MEASUREMENT; + + if (!validate(std::get<1>(res).const_ptr(),ref.const_ptr(),NB, + ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) + { + printf("householder vector failed \r\n"); + } + + + if (!validate(std::get<0>(res),ref_beta, + ErrThreshold::abserr_householder,ErrThreshold::relerr_householder)) + { + printf("householder beta failed \r\n"); + } + std::cout << "=====\r\n"; +} + +#include "debug_mat.h" + +#if 1 +// R >= C +template typename A> +auto QR(const Matrix&m,const T eps,bool wantQ) +{ + #if defined(STATIC_TEST) + Vector tau; + Matrix RM = m; + Matrix Q = Matrix::identity(); + + + // Temporaries + Vector tmpvec; + Matrix tmpmat; + #else + Vector tau(m.columns()); + Matrix RM = m; + Matrix Q = Matrix::identity(m.rows()); + + + // Temporaries + Vector tmpvec(m.rows()); + Matrix tmpmat(1,m.rows()); + #endif + + const int NBC = m.columns(); + const int NBR = m.rows(); + + + for(index_t c=0;c vt(tmpvec,1,NBR-c); + dot(tmpmat.sub(0,1,0,NBC-c),vt,RM.sub(c,c)); + + RM.sub(c,c) = + RM.sub(c,c) - beta * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBC-c)); + + // Copy householder reflector + // Not valid when c == C-1 + // We don't want to use a test since CMSIS-DSP is not using + // one and introducing a test would give worse performance + RM.col(c,c+1) = copy(tmpvec.sub(1,NBR-c)); + + } + + + auto beta = householder(RM.col(NBC-1,NBC-1),eps,tmpvec); + tau[NBC-1] = beta; + + MatrixView vt(tmpvec,1,NBR-(NBC-1)); + dot(tmpmat.sub(0,1,0,NBC-(NBC-1)),vt,RM.sub(NBC-1,NBC-1)); + + RM.sub(NBC-1,NBC-1) = + RM.sub(NBC-1,NBC-1) - beta * outer(tmpvec.sub(0,NBR-(NBC-1)),tmpmat.row(0,0,NBC-(NBC-1))); + + + + + if (wantQ) + { + for(index_t c=NBC-1;c>=0;c--) + { + tmpvec.sub(1) = copy(RM.col(c,c+1)); + tmpvec[0] = number_traits::one(); + + MatrixView vt(tmpvec,1,NBR-c); + dot(tmpmat.sub(0,1,0,NBR-c),vt,Q.sub(c,c)); + + Q.sub(c,c) = + Q.sub(c,c) - tau[c] * outer(tmpvec.sub(0,NBR-c),tmpmat.row(0,0,NBR-c)); + + } + } + + return std::make_tuple(RM,Q,tau); + +} + +template +static void testQR() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << C << "\r\n"; + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,C); + #endif + + cmsis_init_qr(a.ptr(),R,C); + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + auto res = QR(a,HouseholderThreshold::value,true); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + //std::cout << "next\r\n"; + + //std::cout << std::get<0>(res); + //std::cout << std::get<1>(res); + //std::cout << std::get<2>(res); + + // For fair comparison, in dynamic mode we must take into + // account the memory allocations since they are made + // by the QR algorithms + #if !defined(STATIC_TEST) + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #endif + + #if 0 //defined(STATIC_TEST) + PMat cmsis_res; + PMat cmsis_outRp; + PMat cmsis_outQp; + PVector cmsis_tau; + PVector cmsis_tmpa; + PVector cmsis_tmpb; + #else + PMat cmsis_res(R,C); + PMat cmsis_outRp(R,C); + PMat cmsis_outQp(R,R); + PVector cmsis_tau(C); + PVector cmsis_tmpa(R); + PVector cmsis_tmpb(C); + #endif + + typename CMSISMatrixType::type RP; + RP.numRows = R; + RP.numCols = C; + RP.pData = cmsis_outRp.ptr(); + + typename CMSISMatrixType::type QP; + QP.numRows = R; + QP.numCols = R; + QP.pData = cmsis_outQp.ptr(); + + typename CMSISMatrixType::type IN; + IN.numRows = R; + IN.numCols = C; + IN.pData = a.ptr(); + + //std::cout << "-------\r\n"; + + #if defined(STATIC_TEST) + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + #endif + arm_status status=cmsis_qr(&IN,HouseholderThreshold::value, + &RP,&QP, + cmsis_tau.ptr(), + cmsis_tmpa.ptr(), + cmsis_tmpb.ptr()); + (void)status; + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_outRp; + //std::cout << cmsis_outQp; + //std::cout << cmsis_tau; + + if (!validate(std::get<0>(res),cmsis_outRp, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR Rp matrix failed \r\n"); + } + + + if (!validate(std::get<1>(res),cmsis_outQp, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR Qp matrix failed \r\n"); + } + + if (!validate(std::get<2>(res),cmsis_tau, + ErrThreshold::abserr_qr,ErrThreshold::relerr_qr)) + { + printf("QR tau failed \r\n"); + } + std::cout << "=====\r\n"; +} + +#endif + + +template typename A> +auto cholesky(const Matrix&a) +{ + // Temporaries + #if defined(STATIC_TEST) + Matrix g = a; + Vector tmp; + #else + Matrix g = a; + Vector tmp(a.rows()); + #endif + + const int NBR = a.rows(); + + g.col(0,0) = g.col(0,0) * (T)(number_traits::one() / _gen_sqrt(g(0,0))); + + for(int j=1;j::one() / _gen_sqrt(g(j,j)- tmp[j])); + + } + return(g); +} + + +template +static void testCholesky() +{ + std::cout << "----\r\n"; + std::cout << R << " x " << R << "\r\n"; + #if defined(STATIC_TEST) + PMat a; + #else + PMat a(R,R); + #endif + + cmsis_init_cholesky(a.ptr(),R,R); + + //std::cout << a; + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + startSectionNB(1); + // Not totally equivalent to CMSIS implementation + // It should be possible to rewrite it to avoid use of + // temporary buffer like CMSIS-DSP + auto res = cholesky(a); + stopSectionNB(1); + STOP_CYCLE_MEASUREMENT; + + + //std::cout << res; + + PMat cmsis_res(T{}); + + typename CMSISMatrixType::type OUT; + OUT.numRows = R; + OUT.numCols = R; + OUT.pData = cmsis_res.ptr(); + + + typename CMSISMatrixType::type IN; + IN.numRows = R; + IN.numCols = R; + IN.pData = a.ptr(); + + //std::cout << "-------\r\n"; + + + INIT_SYSTICK; + START_CYCLE_MEASUREMENT; + arm_status status=cmsis_cholesky(&IN,&OUT); + (void)status; + STOP_CYCLE_MEASUREMENT; + + //std::cout << cmsis_res; + + + if (!validateLT(res,cmsis_res, + ErrThreshold::abserr_cholesky,ErrThreshold::relerr_cholesky)) + { + printf("cholesky failed \r\n"); + } + std::cout << "=====\r\n"; +} + +template +struct TESTINV +{ + static void all() + { + testinv(); + } +}; + +template +struct TESTOUTER +{ + static void all() + { + testouter(); + } +}; + +template +struct TESTMATVEC +{ + static void all() + { + testmatvec(); + } +}; + +template +struct TESTCOMPLEXMATVEC +{ + static void all() + { + testcomplexmatvec(); + } +}; + +template +struct TESTADD +{ + static void all() + { + testadd (); + } +}; + +template +struct TESTMATTRANSPOSE +{ + static void all() + { + testmattranspose(); + } +}; + +template +struct TESTMATMULT +{ + static void all() + { + testmatmult(); + } +}; + +template +struct TESTSUBMATMULT +{ + static void all() + { + testsubmatmult(); + } +}; + + +template +struct TEST_CASES +{ + static void all() + { + (mp_push_front::all(),...); + } +}; + +template