diff --git a/software/.gitignore b/software/.gitignore
index 49abad0af..db146be92 100644
--- a/software/.gitignore
+++ b/software/.gitignore
@@ -26,4 +26,4 @@ runtime/arch.ld
 
 # Generated data files
 data.h
-data/data*.h
+apps/*/data*.h
diff --git a/software/apps/Makefile b/software/apps/Makefile
index 1f1f1a782..2bbb19857 100644
--- a/software/apps/Makefile
+++ b/software/apps/Makefile
@@ -18,7 +18,6 @@ include $(RUNTIME_DIR)/runtime.mk
 
 APPS := $(patsubst $(APPS_DIR)/%/main.c,%,$(shell find $(APPS_DIR) -name "main.c"))
 DATA := $(patsubst %.args,%.h,$(shell find $(APPS_DIR) -name "data.args"))
-ALLPYS := $(patsubst %.py,%.h,$(wildcard $(DATA_DIR)/*.py))
 BINARIES := $(addprefix $(BIN_DIR)/,$(APPS))
 ifeq ($(config), systolic)
 	ALL := $(APPS)
@@ -37,7 +36,7 @@ all_llvm: $(ALL_LLVM)
 $(APPS): % : $(BIN_DIR)/% $(APPS_DIR)/Makefile $(shell find $(RUNTIME_DIR)/**.{S,c,h,ld} -type f)
 
 .PHONY: $(BINARIES)
-$(BINARIES): $(BIN_DIR)/%: %/main.c.o $(RUNTIME) $(LINKER_SCRIPT) $(DATA) $(ALLPYS) update_opcodes
+$(BINARIES): $(BIN_DIR)/%: %/main.c.o $(RUNTIME) $(LINKER_SCRIPT) $(DATA) update_opcodes
 	mkdir -p $(dir $@)
 	$(RISCV_CC) -Iinclude $(RISCV_LDFLAGS) -o $@ $< $(RUNTIME) -T$(RUNTIME_DIR)/link.ld
 	$(RISCV_OBJDUMP) $(RISCV_OBJDUMP_FLAGS) -D $@ > $@.dump
diff --git a/software/apps/cfft_radix2_q16/data.args b/software/apps/cfft_radix2_q16/data.args
new file mode 100644
index 000000000..a7d20d682
--- /dev/null
+++ b/software/apps/cfft_radix2_q16/data.args
@@ -0,0 +1 @@
+LEN 64
diff --git a/software/apps/cfft_radix4_q16/data.args b/software/apps/cfft_radix4_q16/data.args
new file mode 100644
index 000000000..a7d20d682
--- /dev/null
+++ b/software/apps/cfft_radix4_q16/data.args
@@ -0,0 +1 @@
+LEN 64
diff --git a/software/apps/chest_q16/data.args b/software/apps/chest_q16/data.args
new file mode 100644
index 000000000..18e1eafab
--- /dev/null
+++ b/software/apps/chest_q16/data.args
@@ -0,0 +1,3 @@
+N_TX 4
+N_RX 32
+N_SAMPLES 256
diff --git a/software/data/data_cfft_radix2_q16.h.tpl b/software/data/data_cfft_radix2_q16.h.tpl
deleted file mode 100644
index 35ae84005..000000000
--- a/software/data/data_cfft_radix2_q16.h.tpl
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2022 ETH Zurich and University of Bologna.
-// Licensed under the Apache License, Version 2.0, see LICENSE for details.
-// SPDX-License-Identifier: Apache-2.0
-
-// Automatically generated by:
-// data/data_cfft_radix2_q16.py
-
-\
-<% def array_to_cstr(array):
-    out = '{'
-    i = 0
-    out += '\n'
-    for a in array:
-        out += '(int16_t) 0X{:04X}, '.format(a&0xffff)
-        i += 1
-        if i % 16 == 0:
-            out += '\n'
-    out = out[:-2] + '}'
-    return out
-%> \
-
-<% def array_to_str(array):
-    out = '{'
-    i = 0
-    out += '\n'
-    for a in array:
-        out += '{}, '.format(a)
-        i += 1
-        if i % 16 == 0:
-            out += '\n'
-    out = out[:-2] + '}'
-    return out
-%> \
-
-#define LOG2 (${Log2Len})
-#define N_CSAMPLES (${Len})
-#define N_RSAMPLES (2 * N_CSAMPLES)
-#define N_TWIDDLES (3 * N_CSAMPLES / 4)
-#define N_BANKS (NUM_CORES * BANKING_FACTOR)
-#define BITREVINDEXTABLE_LENGTH (${BitrevLen})
-
-// Tolerance for correctness check
-#define TOLERANCE (${tolerance})
-
-% for m, m_str in zip([vector_inp, vector_res], ['l2_pSrc', 'l2_pRes']):
-
-// Data arrays for matrix ${m_str}
-int16_t ${m_str}[${2*Len}] = ${array_to_cstr(m)};
-
-% endfor \
-
-// Twiddles
-int16_t l2_twiddleCoef_q16[${int(6*Len/4)}] = ${array_to_cstr(vector_twi)};
-
-// Bitreversal
-uint16_t l2_BitRevIndexTable[${BitrevLen}] = ${array_to_str(vector_bitrev)};
diff --git a/software/data/data_cfft_radix2_q16.py b/software/data/data_cfft_radix2_q16.py
deleted file mode 100644
index e1615e53e..000000000
--- a/software/data/data_cfft_radix2_q16.py
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2022 ETH Zurich and University of Bologna.
-# Solderpad Hardware License, Version 0.51, see LICENSE for details.
-# SPDX-License-Identifier: SHL-0.51
-
-# This script generates data for the cfft kernel.
-# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
-
-import numpy as np
-import math as M
-import argparse
-import pathlib
-from mako.template import Template
-from sympy.combinatorics import Permutation
-
-
-##################
-# compute_result #
-##################
-
-
-def compute_result(inp, len):
-    """
-    Funciton to generate the expected result of the testcase.
-
-    Arguments
-    ---------
-    input: numpy array of inputs
-    env: Length of the input transform.
-    """
-
-    # Q16:
-    # len=16:    Q1.15 -> Q5.11
-    # len=32:    Q1.15 -> Q6.10
-    # len=64:    Q1.15 -> Q7.9
-    # len=128:   Q1.15 -> Q8.8
-    # len=256:   Q1.15 -> Q9.7
-    # len=512:   Q1.15 -> Q10.6
-    # len=1024:  Q1.15 -> Q11.5
-    # len=2048:  Q1.15 -> Q12.4
-    # len=4096:  Q1.15 -> Q13.3
-    bit_shift_dict_q16 = {
-        16: 11,
-        32: 10,
-        64: 9,
-        128: 8,
-        256: 7,
-        512: 6,
-        1024: 5,
-        2048: 4,
-        4096: 3}
-    my_type = np.int16
-    my_fixpoint = 15
-    bit_shift_dict = bit_shift_dict_q16
-    a = inp.astype(my_type)
-    result = np.zeros(a.size, dtype=my_type)
-    complex_a = np.zeros(int(a.size / 2), dtype=np.csingle)
-    complex_result = np.zeros(a.size >> 1, dtype=np.csingle)
-    for i in range(a.size >> 1):
-        complex_a[i] = a[2 * i].astype(np.csingle) / (2**(my_fixpoint)) + (
-            a[2 * i + 1].astype(np.csingle) / (2**(my_fixpoint))) * 1j
-    complex_result = np.fft.fft(complex_a)
-    for i in range(int(a.size / 2)):
-        result[2 * i] = (np.real(complex_result[i]) *
-                         (2**(bit_shift_dict[int(a.size / 2)]))
-                         ).astype(my_type)
-        result[2 * i + 1] = (np.imag(complex_result[i]) *
-                             (2**(bit_shift_dict[int(a.size / 2)]))
-                             ).astype(my_type)
-
-    return result
-
-
-def compute_twiddles(length):
-    PI = 3.14159265358979
-    N = length
-    twiddleCoefq15 = np.zeros((int)(2 * 3 * N / 4), np.int16)
-    for i in range(0, (int)(3 * N / 4)):
-        twiddleCoefq15_cos = M.cos(i * 2 * PI / N)
-        twiddleCoefq15_sin = M.sin(i * 2 * PI / N)
-        twiddleCoefq15[2 * i] = int(round(twiddleCoefq15_cos * (2**15 - 1)))
-        twiddleCoefq15[2 * i +
-                       1] = int(round(twiddleCoefq15_sin * (2**15 - 1)))
-    return twiddleCoefq15
-
-
-def compute_bitreversal(N, R):
-
-    # Decompose
-    logR2 = []
-    idx = N
-    while (idx >= R):
-        logR2.append(int(M.log2(R)))
-        idx = idx // R
-    if (idx > 1):
-        logR2.append(int(M.log2(idx)))
-
-    # Bitreversal
-    indexes = []
-    for x in range(N):
-        result = 0
-        for bits in logR2:
-            mask = (0xffffffff >> (32 - bits))
-            result = (result << bits) | (x & mask)
-            x = x >> bits
-        indexes.append(result)
-
-    # Create transpositions table
-    tps = []
-    for c in Permutation.from_sequence(indexes).cyclic_form:
-        for i in range(len(c) - 1):
-            tps.append([c[i] * 8, c[-1] * 8])
-
-    return tps
-
-
-def gen_data_header_file(
-        outdir: pathlib.Path.cwd(),
-        tpl: pathlib.Path.cwd(),
-        **kwargs):
-
-    file = outdir / f"{kwargs['name']}.h"
-
-    print(tpl, outdir, kwargs['name'])
-
-    template = Template(filename=str(tpl))
-    with file.open('w') as f:
-        f.write(template.render(**kwargs))
-
-
-def main():
-
-    parser = argparse.ArgumentParser(description='Generate data for kernels')
-    parser.add_argument(
-        "-o",
-        "--outdir",
-        type=pathlib.Path,
-        default=pathlib.Path(__file__).parent.absolute(),
-        required=False,
-        help='Select out directory of generated data files'
-    )
-    parser.add_argument(
-        "-t",
-        "--tpl",
-        type=pathlib.Path,
-        required=False,
-        default=pathlib.Path(__file__).parent.absolute() /
-        "data_cfft_radix2_q16.h.tpl",
-        help='Path to mako template')
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action='store_true',
-        help='Set verbose'
-    )
-    parser.add_argument(
-        "-d",
-        "--dimension",
-        type=int,
-        required=False,
-        default=64,
-        help='Input dimension'
-    )
-
-    args = parser.parse_args()
-
-    # Create sparse matrix
-    Len = args.dimension
-    Input = np.random.randint(-2**(15), 2**(15) - 1, 2 * Len, dtype=np.int16)
-    Result = compute_result(Input, Len)
-    Twiddles = compute_twiddles(Len)
-    Bitreversal = np.ndarray.flatten(np.array(compute_bitreversal(Len, 2)))
-
-    tolerance = {
-        16: 16,
-        32: 20,
-        64: 24,
-        128: 28,
-        256: 32,
-        512: 48,
-        1024: 64,
-        2048: 96,
-        4096: 128}
-
-    kwargs = {'name': 'data_cfft_radix2_q16',
-              'vector_inp': Input,
-              'vector_res': Result,
-              'vector_twi': Twiddles,
-              'vector_bitrev': Bitreversal,
-              'Len': Len,
-              'Log2Len': int(np.log2(Len)),
-              'BitrevLen': int(2 * len(Bitreversal)),
-              'tolerance': tolerance[int(Len)]}
-
-    gen_data_header_file(args.outdir, args.tpl, **kwargs)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/software/data/data_cfft_radix4_q16.h.tpl b/software/data/data_cfft_radix4_q16.h.tpl
deleted file mode 100644
index 1a147a30c..000000000
--- a/software/data/data_cfft_radix4_q16.h.tpl
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2022 ETH Zurich and University of Bologna.
-// Licensed under the Apache License, Version 2.0, see LICENSE for details.
-// SPDX-License-Identifier: Apache-2.0
-
-// Automatically generated by:
-// data/data_cfft_radix4_q16.py
-
-\
-<% def array_to_cstr(array):
-    out = '{'
-    i = 0
-    out += '\n'
-    for a in array:
-        out += '(int16_t) 0X{:04X}, '.format(a&0xffff)
-        i += 1
-        if i % 16 == 0:
-            out += '\n'
-    out = out[:-2] + '}'
-    return out
-%> \
-
-<% def array_to_str(array):
-    out = '{'
-    i = 0
-    out += '\n'
-    for a in array:
-        out += '{}, '.format(a)
-        i += 1
-        if i % 16 == 0:
-            out += '\n'
-    out = out[:-2] + '}'
-    return out
-%> \
-
-#define LOG2 (${Log2Len})
-#define N_CSAMPLES (${Len})
-#define N_RSAMPLES (2 * N_CSAMPLES)
-#define N_TWIDDLES (3 * N_CSAMPLES / 4)
-#define N_BANKS (NUM_CORES * BANKING_FACTOR)
-#define BITREVINDEXTABLE_LENGTH (${BitrevLen})
-
-// Maximum number of independent FFT columns allowed
-#define MAX_COL (N_BANKS / (N_CSAMPLES / 4))
-// Tolerance for correctness check
-#define TOLERANCE (${tolerance})
-
-% for m, m_str in zip([vector_inp, vector_res], ['l2_pSrc', 'l2_pRes']):
-
-// Data arrays for matrix ${m_str}
-int16_t ${m_str}[${2*Len}] = ${array_to_cstr(m)};
-
-% endfor \
-
-// Twiddles
-int16_t l2_twiddleCoef_q16[${int(6*Len/4)}] = ${array_to_cstr(vector_twi)};
-
-// Bitreversal
-uint16_t l2_BitRevIndexTable[${BitrevLen}] = ${array_to_str(vector_bitrev)};
diff --git a/software/data/data_cfft_radix4_q16.py b/software/data/data_cfft_radix4_q16.py
deleted file mode 100755
index b394a2884..000000000
--- a/software/data/data_cfft_radix4_q16.py
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2022 ETH Zurich and University of Bologna.
-# Solderpad Hardware License, Version 0.51, see LICENSE for details.
-# SPDX-License-Identifier: SHL-0.51
-
-# This script generates data for the cfft kernel.
-# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
-
-import numpy as np
-import math as M
-import argparse
-import pathlib
-from mako.template import Template
-from sympy.combinatorics import Permutation
-
-
-##################
-# compute_result #
-##################
-
-
-def compute_result(inp, len):
-    """
-    Funciton to generate the expected result of the testcase.
-
-    Arguments
-    ---------
-    input: numpy array of inputs
-    env: Length of the input transform.
-    """
-
-    # Q16:
-    # len=16:    Q1.15 -> Q5.11
-    # len=32:    Q1.15 -> Q6.10
-    # len=64:    Q1.15 -> Q7.9
-    # len=128:   Q1.15 -> Q8.8
-    # len=256:   Q1.15 -> Q9.7
-    # len=512:   Q1.15 -> Q10.6
-    # len=1024:  Q1.15 -> Q11.5
-    # len=2048:  Q1.15 -> Q12.4
-    # len=4096:  Q1.15 -> Q13.3
-    bit_shift_dict_q16 = {
-        16: 11,
-        32: 10,
-        64: 9,
-        128: 8,
-        256: 7,
-        512: 6,
-        1024: 5,
-        2048: 4,
-        4096: 3}
-    my_type = np.int16
-    my_fixpoint = 15
-    bit_shift_dict = bit_shift_dict_q16
-    a = inp.astype(my_type)
-    result = np.zeros(a.size, dtype=my_type)
-    complex_a = np.zeros(int(a.size / 2), dtype=np.csingle)
-    complex_result = np.zeros(a.size >> 1, dtype=np.csingle)
-    for i in range(a.size >> 1):
-        complex_a[i] = a[2 * i].astype(np.csingle) / (2**(my_fixpoint)) + (
-            a[2 * i + 1].astype(np.csingle) / (2**(my_fixpoint))) * 1j
-    complex_result = np.fft.fft(complex_a)
-    for i in range(int(a.size / 2)):
-        result[2 * i] = (np.real(complex_result[i]) *
-                         (2**(bit_shift_dict[int(a.size / 2)]))
-                         ).astype(my_type)
-        result[2 * i + 1] = (np.imag(complex_result[i]) *
-                             (2**(bit_shift_dict[int(a.size / 2)]))
-                             ).astype(my_type)
-
-    return result
-
-
-def compute_twiddles(length):
-    PI = 3.14159265358979
-    N = length
-    twiddleCoefq15 = np.zeros((int)(2 * 3 * N / 4), np.int16)
-    for i in range(0, (int)(3 * N / 4)):
-        twiddleCoefq15_cos = M.cos(i * 2 * PI / N)
-        twiddleCoefq15_sin = M.sin(i * 2 * PI / N)
-        twiddleCoefq15[2 * i] = int(round(twiddleCoefq15_cos * (2**15 - 1)))
-        twiddleCoefq15[2 * i +
-                       1] = int(round(twiddleCoefq15_sin * (2**15 - 1)))
-    return twiddleCoefq15
-
-
-def compute_bitreversal(N, R):
-
-    # Decompose
-    logR2 = []
-    idx = N
-    while (idx >= R):
-        logR2.append(int(M.log2(R)))
-        idx = idx // R
-    if (idx > 1):
-        logR2.append(int(M.log2(idx)))
-
-    # Bitreversal
-    indexes = []
-    for x in range(N):
-        result = 0
-        for bits in logR2:
-            mask = (0xffffffff >> (32 - bits))
-            result = (result << bits) | (x & mask)
-            x = x >> bits
-        indexes.append(result)
-
-    # Create transpositions table
-    tps = []
-    for c in Permutation.from_sequence(indexes).cyclic_form:
-        for i in range(len(c) - 1):
-            tps.append([c[i] * 8, c[-1] * 8])
-
-    return tps
-
-
-def gen_data_header_file(
-        outdir: pathlib.Path.cwd(),
-        tpl: pathlib.Path.cwd(),
-        **kwargs):
-
-    file = outdir / f"{kwargs['name']}.h"
-
-    print(tpl, outdir, kwargs['name'])
-
-    template = Template(filename=str(tpl))
-    with file.open('w') as f:
-        f.write(template.render(**kwargs))
-
-
-def main():
-
-    parser = argparse.ArgumentParser(description='Generate data for kernels')
-    parser.add_argument(
-        "-o",
-        "--outdir",
-        type=pathlib.Path,
-        default=pathlib.Path(__file__).parent.absolute(),
-        required=False,
-        help='Select out directory of generated data files'
-    )
-    parser.add_argument(
-        "-t",
-        "--tpl",
-        type=pathlib.Path,
-        required=False,
-        default=pathlib.Path(__file__).parent.absolute() /
-        "data_cfft_radix4_q16.h.tpl",
-        help='Path to mako template')
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action='store_true',
-        help='Set verbose'
-    )
-    parser.add_argument(
-        "-d",
-        "--dimension",
-        type=int,
-        required=False,
-        default=64,
-        help='Input dimension'
-    )
-
-    args = parser.parse_args()
-
-    # Create sparse matrix
-    Len = args.dimension
-    Input = np.random.randint(-2**(15), 2**(15) - 1, 2 * Len, dtype=np.int16)
-    Result = compute_result(Input, Len)
-    Twiddles = compute_twiddles(Len)
-    Bitreversal = np.ndarray.flatten(np.array(compute_bitreversal(Len, 2)))
-
-    tolerance = {
-        16: 16,
-        32: 20,
-        64: 24,
-        128: 28,
-        256: 32,
-        512: 48,
-        1024: 64,
-        2048: 96,
-        4096: 128}
-
-    kwargs = {'name': 'data_cfft_radix4_q16',
-              'vector_inp': Input,
-              'vector_res': Result,
-              'vector_twi': Twiddles,
-              'vector_bitrev': Bitreversal,
-              'Len': Len,
-              'Log2Len': int(np.log2(Len)),
-              'BitrevLen': len(Bitreversal),
-              'tolerance': tolerance[int(Len)]}
-
-    gen_data_header_file(args.outdir, args.tpl, **kwargs)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/software/data/data_chest_q16.h.tpl b/software/data/data_chest_q16.h.tpl
deleted file mode 100644
index 3384a004e..000000000
--- a/software/data/data_chest_q16.h.tpl
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2022 ETH Zurich and University of Bologna.
-// Licensed under the Apache License, Version 2.0, see LICENSE for details.
-// SPDX-License-Identifier: Apache-2.0
-
-// Automatically generated by:
-// data/data_chest_q16.py
-
-\
-<% def array_to_cstr(array):
-    out = '{'
-    i = 0
-    out += '\n'
-    for a in array:
-        out += '{}, '.format(a)
-        i += 1
-        if i % 32 == 0:
-            out += '\n'
-    out = out[:-2] + '}'
-    return out
-%> \
-
-#define N_TX (${nb_tx})
-#define N_RX (${nb_rx})
-#define N_SAMPLES (${nb_samples})
-
-int16_t PilotRX[${2*nb_rx*nb_samples}] = ${array_to_cstr(pilot_rx)};
-
-int16_t PilotTX[${2*nb_tx*nb_samples}] = ${array_to_cstr(pilot_tx)};
-
-int16_t HEST[${2*nb_rx*nb_tx*nb_samples}] = ${array_to_cstr(Hest)};
diff --git a/software/data/data_chest_q16.py b/software/data/data_chest_q16.py
deleted file mode 100755
index 0719dd251..000000000
--- a/software/data/data_chest_q16.py
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2022 ETH Zurich and University of Bologna.
-# Solderpad Hardware License, Version 0.51, see LICENSE for details.
-# SPDX-License-Identifier: SHL-0.51
-
-# This script generates data for the Channel estimation.
-# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
-
-import numpy as np
-import argparse
-import pathlib
-
-from mako.template import Template
-
-##################
-#  write_result  #
-##################
-
-
-def gen_data_header_file(
-        outdir: pathlib.Path.cwd(),
-        tpl: pathlib.Path.cwd(),
-        **kwargs):
-
-    file = outdir / f"{kwargs['name']}.h"
-
-    print(tpl, outdir, kwargs['name'])
-
-    template = Template(filename=str(tpl))
-    with file.open('w') as f:
-        f.write(template.render(**kwargs))
-
-######################
-# Fixpoint Functions #
-######################
-
-
-def q_sat(x):
-    if x > 2**15 - 1:
-        return x - 2**16
-    elif x < -2**15:
-        return x + 2**16
-    else:
-        return x
-
-
-def q_add(a, b):
-    return q_sat(a + b)
-
-
-def q_sub(a, b):
-    return q_sat(a - b)
-
-
-def q_mul(a, b, p):
-    return a * b
-    # return q_roundnorm(a * b, p)
-
-
-def q_div(a, b, p):
-    return a / b
-
-
-def q_roundnorm(a, p):
-    rounding = 1 << (p - 1)
-    return q_sat((a + rounding) >> p)
-
-
-def main():
-
-    parser = argparse.ArgumentParser(description='Generate data for kernels')
-    parser.add_argument(
-        "-o",
-        "--outdir",
-        type=pathlib.Path,
-        default=pathlib.Path(__file__).parent.absolute(),
-        required=False,
-        help='Select out directory of generated data files'
-    )
-    parser.add_argument(
-        "-t",
-        "--tpl",
-        type=pathlib.Path,
-        required=False,
-        default=pathlib.Path(__file__).parent.absolute() /
-        "data_chest_q16.h.tpl",
-        help='Path to mako template')
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action='store_true',
-        help='Set verbose'
-    )
-    parser.add_argument(
-        "-b",
-        "--num_beams",
-        type=int,
-        required=False,
-        default=4,
-        help='Number beams'
-    )
-    parser.add_argument(
-        "-l",
-        "--num_layers",
-        type=int,
-        required=False,
-        default=4,
-        help='Number layers'
-    )
-    parser.add_argument(
-        "-s",
-        "--num_samples",
-        type=int,
-        required=False,
-        default=32,
-        help='Number samples'
-    )
-
-    # Generate the channel mean value
-    def random_SDP_matrix(n):
-        G = np.random.randn(n, n)
-        np.dot(G, G.T, G)
-        return G / np.trace(G)
-
-    # Convert to fixed point
-    def fixed_point_conversion(v_input, fixed_point, scaling_factor):
-        real = (np.multiply(v_input.real,
-                            scaling_factor * 2**(np.log2(fixed_point))))
-        imag = (np.multiply(v_input.imag,
-                            scaling_factor * 2**(np.log2(fixed_point))))
-        output = np.zeros(len(real) * 2)
-        for i in range(0, len(real)):
-            output[2 * i] = real[i]
-            output[2 * i + 1] = imag[i]
-        return output.astype(np.int16)
-
-    # Compute the channel estimate
-    def compute_result(in_rx, in_tx, p):
-        my_type = np.int16
-        a = in_rx.astype(my_type)
-        b = in_tx.astype(my_type)
-        n_rx = a.size >> 1
-        n_tx = b.size >> 1
-        result = np.zeros(2 * (n_tx * n_rx), dtype=my_type)
-        for i in range(n_rx):
-            a_r = a[2 * i]
-            a_i = a[2 * i + 1]
-            for j in range(n_tx):
-                b_r = b[2 * j]
-                b_i = b[2 * j + 1]
-                den = q_mul(b_r, b_r, p) + q_mul(b_i, b_i, p)
-                num_r = q_add(q_mul(a_r, b_r, p), q_mul(a_i, b_i, p))
-                num_i = q_sub(q_mul(a_i, b_r, p), q_mul(a_r, b_i, p))
-                result[2 * (i * n_tx + j)] = q_div(num_r, den, p)
-                result[2 * (i * n_tx + j) + 1] = q_div(num_i, den, p)
-        return result
-
-    args = parser.parse_args()
-    nb_tx = args.num_beams
-    nb_rx = args.num_layers
-    nb_samples = args.num_samples
-
-    H = np.random.randn(nb_rx, nb_tx) + 1j * np.random.randn(nb_rx, nb_tx)
-
-    qvector_pilot_tx = []
-    qvector_pilot_rx = []
-    qvector_Hest = []
-    for k in range(nb_samples):
-        pilot_tx = 1 * np.exp(1j * np.random.randn(nb_tx))
-        pilot_rx = np.dot(H, pilot_tx)
-        fixed_point = 12
-        scaling_factor = 1
-        q_pilot_tx = fixed_point_conversion(
-            np.reshape(pilot_tx, [nb_tx]), fixed_point, 1)
-        q_pilot_rx = fixed_point_conversion(
-            np.reshape(pilot_rx, [nb_rx]), fixed_point, scaling_factor)
-        q_Hest = compute_result(q_pilot_rx, q_pilot_tx, fixed_point)
-        qvector_pilot_tx.append(q_pilot_tx)
-        qvector_pilot_rx.append(q_pilot_rx)
-        qvector_Hest.append(q_Hest)
-
-    qvector_pilot_tx = np.reshape(qvector_pilot_tx, [2 * nb_tx * nb_samples])
-    qvector_pilot_rx = np.reshape(qvector_pilot_rx, [2 * nb_rx * nb_samples])
-    qvector_Hest = np.reshape(qvector_Hest, [2 * nb_tx * nb_rx * nb_samples])
-
-    kwargs = {'name': 'data_chest_q16', 'pilot_tx': qvector_pilot_tx,
-              'pilot_rx': qvector_pilot_rx,
-              'Hest': qvector_Hest,
-              'nb_tx': nb_tx,
-              'nb_rx': nb_rx,
-              'nb_samples': nb_samples}
-
-    gen_data_header_file(args.outdir, args.tpl, **kwargs)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/software/data/generate_cfft.py b/software/data/generate_cfft.py
new file mode 100644
index 000000000..ad16128d2
--- /dev/null
+++ b/software/data/generate_cfft.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 ETH Zurich and University of Bologna.
+# Solderpad Hardware License, Version 0.51, see LICENSE for details.
+# SPDX-License-Identifier: SHL-0.51
+
+# This script generates data for the cfft kernel.
+# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
+
+import numpy as np
+import math as M
+import argparse
+import pathlib
+from sympy.combinatorics import Permutation
+
+def generate_cfft_q16(N):
+    # Q16:
+    # len=16:    Q1.15 -> Q5.11
+    # len=32:    Q1.15 -> Q6.10
+    # len=64:    Q1.15 -> Q7.9
+    # len=128:   Q1.15 -> Q8.8
+    # len=256:   Q1.15 -> Q9.7
+    # len=512:   Q1.15 -> Q10.6
+    # len=1024:  Q1.15 -> Q11.5
+    # len=2048:  Q1.15 -> Q12.4
+    # len=4096:  Q1.15 -> Q13.3
+    src = (np.random.randint(-2**(15), 2**(15) - 1,
+           2 * N, dtype=np.int16)).astype(np.int16)
+    tolerance = {
+        16: 16,
+        32: 20,
+        64: 24,
+        128: 28,
+        256: 32,
+        512: 48,
+        1024: 64,
+        2048: 96,
+        4096: 128}
+    bit_shift_dict_q16 = {
+        16: 11,
+        32: 10,
+        64: 9,
+        128: 8,
+        256: 7,
+        512: 6,
+        1024: 5,
+        2048: 4,
+        4096: 3}
+    my_fixpoint = 15
+    dst = np.zeros(2 * N, dtype=np.int16)
+    complex_src = np.zeros(N, dtype=np.csingle)
+    complex_dst = np.zeros(N, dtype=np.csingle)
+    for i in range(N):
+        shift = 2**(my_fixpoint)
+        complex_src[i] = (src[2 * i].astype(np.csingle) / shift) + \
+            1j * (src[2 * i + 1].astype(np.csingle) / shift)
+    complex_dst = np.fft.fft(complex_src)
+    for i in range(N):
+        shift = 2**(bit_shift_dict_q16[N])
+        dst[2 * i] = (np.real(complex_dst[i]) * shift).astype(np.int16)
+        dst[2 * i + 1] = (np.imag(complex_dst[i]) * shift).astype(np.int16)
+    return src, dst, tolerance[N]
+
+def generate_twiddleCoefq15(N):
+    PI = 3.14159265358979
+    twiddleCoefq15 = np.zeros((int)(2 * 3 * N / 4), np.int16)
+    for i in range(0, (int)(3 * N / 4)):
+        twiddleCoefq15_cos = M.cos(i * 2 * PI / N)
+        twiddleCoefq15_sin = M.sin(i * 2 * PI / N)
+        twiddleCoefq15[2 * i] = int(round(twiddleCoefq15_cos * (2**15 - 1)))
+        twiddleCoefq15[2 * i +
+                       1] = int(round(twiddleCoefq15_sin * (2**15 - 1)))
+    return twiddleCoefq15
+
+def generate_bitreversal(N, R):
+    # Decompose
+    logR2 = []
+    idx = N
+    while (idx >= R):
+        logR2.append(int(M.log2(R)))
+        idx = idx // R
+    if (idx > 1):
+        logR2.append(int(M.log2(idx)))
+    # Bitreversal
+    indexes = []
+    for x in range(N):
+        result = 0
+        for bits in logR2:
+            mask = (0xffffffff >> (32 - bits))
+            result = (result << bits) | (x & mask)
+            x = x >> bits
+        indexes.append(result)
+    # Create transpositions table
+    tps = []
+    for c in Permutation.from_sequence(indexes).cyclic_form:
+        for i in range(len(c) - 1):
+            tps.append([c[i] * 8, c[-1] * 8])
+    return np.ndarray.flatten(np.array(tps))
+
+
+
diff --git a/software/data/generate_chest.py b/software/data/generate_chest.py
new file mode 100755
index 000000000..c19873f95
--- /dev/null
+++ b/software/data/generate_chest.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 ETH Zurich and University of Bologna.
+# Solderpad Hardware License, Version 0.51, see LICENSE for details.
+# SPDX-License-Identifier: SHL-0.51
+
+# This script generates data for the Channel estimation.
+# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
+
+import numpy as np
+import argparse
+import pathlib
+
+def q_sat(x):
+    if x > 2**15 - 1:
+        return x - 2**16
+    elif x < -2**15:
+        return x + 2**16
+    else:
+        return x
+
+def q_add(a, b):
+    return q_sat(a + b)
+
+
+def q_sub(a, b):
+    return q_sat(a - b)
+
+
+def q_mul(a, b, p):
+    return a * b
+
+
+def q_div(a, b, p):
+    return a / b
+
+# Convert to fixed point
+def fixed_point_conversion(v_input, fixed_point, scaling_factor):
+    real = (np.multiply(v_input.real,
+                        scaling_factor * 2**(np.log2(fixed_point))))
+    imag = (np.multiply(v_input.imag,
+                        scaling_factor * 2**(np.log2(fixed_point))))
+    output = np.zeros(len(real) * 2)
+    for i in range(0, len(real)):
+        output[2 * i] = real[i]
+        output[2 * i + 1] = imag[i]
+    return output.astype(np.int16)
+
+# Compute the channel estimate
+
+def compute_chest_q16(in_rx, in_tx, p):
+    my_type = np.int16
+    a = in_rx.astype(my_type)
+    b = in_tx.astype(my_type)
+    n_rx = a.size >> 1
+    n_tx = b.size >> 1
+    result = np.zeros(2 * (n_tx * n_rx), dtype=my_type)
+    for i in range(n_rx):
+        a_r = a[2 * i]
+        a_i = a[2 * i + 1]
+        for j in range(n_tx):
+            b_r = b[2 * j]
+            b_i = b[2 * j + 1]
+            den = q_mul(b_r, b_r, p) + q_mul(b_i, b_i, p)
+            num_r = q_add(q_mul(a_r, b_r, p), q_mul(a_i, b_i, p))
+            num_i = q_sub(q_mul(a_i, b_r, p), q_mul(a_r, b_i, p))
+            result[2 * (i * n_tx + j)] = q_div(num_r, den, p)
+            result[2 * (i * n_tx + j) + 1] = q_div(num_i, den, p)
+    return result
+
+
+def generate_chest_q16(nb_tx, nb_rx, nb_samples):
+    H = np.random.randn(nb_rx, nb_tx) + 1j * np.random.randn(nb_rx, nb_tx)
+    qvector_pilot_tx = []
+    qvector_pilot_rx = []
+    qvector_Hest = []
+    for k in range(nb_samples):
+        pilot_tx = 1 * np.exp(1j * np.random.randn(nb_tx))
+        pilot_rx = np.dot(H, pilot_tx)
+        fixed_point = 12
+        scaling_factor = 1
+        q_pilot_tx = fixed_point_conversion(
+            np.reshape(pilot_tx, [nb_tx]), fixed_point, 1)
+        q_pilot_rx = fixed_point_conversion(
+            np.reshape(
+                pilot_rx,
+                [nb_rx]),
+            fixed_point,
+            scaling_factor)
+        q_Hest = compute_chest_q16(q_pilot_rx, q_pilot_tx, fixed_point)
+        qvector_pilot_tx.append(q_pilot_tx)
+        qvector_pilot_rx.append(q_pilot_rx)
+        qvector_Hest.append(q_Hest)
+    qvector_pilot_tx = np.reshape(qvector_pilot_tx, [2 * nb_tx * nb_samples])
+    qvector_pilot_rx = np.reshape(qvector_pilot_rx, [2 * nb_rx * nb_samples])
+    qvector_Hest = np.reshape(qvector_Hest, [2 * nb_tx * nb_rx * nb_samples])
+    return qvector_pilot_tx, qvector_pilot_rx, qvector_Hest
diff --git a/software/data/print_header.py b/software/data/print_header.py
new file mode 100644
index 000000000..13b9877f4
--- /dev/null
+++ b/software/data/print_header.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 ETH Zurich and University of Bologna.
+# Solderpad Hardware License, Version 0.51, see LICENSE for details.
+# SPDX-License-Identifier: SHL-0.51
+
+# This script generates data for the Channel estimation.
+# Author: Marco Bertuletti <mbertuletti@iis.ee.ethz.ch>
+
+import argparse
+import os
+import math
+from generate_cfft import *
+from generate_chest import *
+
+def extract_data_args(filename):
+    # Define a dictionary to store numerical values for each flag
+    args = {}
+
+    # Open the file for reading
+    with open(filename, 'r') as file:
+        # Iterate through each line in the file
+        for line in file:
+            # Split the line into words
+            words = line.split()
+            # Iterate through each word in the line
+            for i in range(len(words)):
+                flag = words[i]  # Get the flag name
+                # Check if the next word exists and is a numerical value
+                if i + 1 < len(words) and words[i + 1].isdigit():
+                    # Convert the numerical value to an integer
+                    numerical_value = int(words[i + 1])
+                    # Store the numerical value in the structure
+                    args[flag] = numerical_value
+
+    # Return the structure containing numerical values for each flag
+    return args
+
+class dot_dict:
+    def __init__(self, data):
+        self.data = data
+
+    def __getattr__(self, attr):
+        if attr in self.data:
+            return self.data[attr]
+        else:
+            raise AttributeError(f"Object has no attribute '{attr}'")
+
+def print_array(arr, typ, name, str):
+    count = 0
+    output_string = typ
+    output_string += " __attribute__((aligned(sizeof(int32_t)), section(\".l2\"))) "
+    output_string += name + '[{}] = {{\n'.format(arr.size)
+    for value in arr:
+        output_string += '(int16_t) 0X{:04X}, '.format(value&0xffff)
+        count += 1
+        if count % 8 == 0:
+            output_string += '\n'
+    output_string = output_string[:-3]
+    output_string += "};\n"
+    return output_string
+
+def print_file(string, filename):
+    with open(filename, "w") as file:
+        # Write the string to the file
+        file.write(string + '\n')
+    return file
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Generate data.h header files.')
+    parser.add_argument('--params', type=str, help='Name of the app')
+    # Parse the command-line arguments
+    args = parser.parse_args()
+    params = args.params
+    # Read arguments from data.args file
+    data_args = extract_data_args(params)
+    (app_path, _) = os.path.split(params)
+    (_, app_name) = os.path.split(app_path)
+
+    if data_args != {}:
+        string =  "// Copyright 2022 ETH Zurich and University of Bologna.\n"
+        string += "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n"
+        string += "// SPDX-License-Identifier: Apache-2.0\n\n"
+        string += "//File generated with .data/print_header.py\n"
+
+        data_args = dot_dict(data_args) #Access args with .notation
+
+        if app_name == "cfft_radix4_q16":
+            # cfft_radix4_q16
+            src_cfft_q16, dst_cfft_q16, tolerance_q16= generate_cfft_q16(data_args.LEN)
+            brv_cfft_q16 = generate_bitreversal(data_args.LEN, 4)
+            twi_cfft_q16 = generate_twiddleCoefq15(data_args.LEN)
+            string += "#define LOG2 ({})\n".format(np.int(math.log2(data_args.LEN)))
+            string += "#define N_CSAMPLES ({})\n".format(data_args.LEN)
+            string += "#define N_RSAMPLES ({})\n".format(2 * data_args.LEN)
+            string += "#define N_TWIDDLES ({})\n".format(3 * data_args.LEN // 4)
+            string += "#define BITREVINDEXTABLE_LENGTH ({})\n".format(len(brv_cfft_q16))
+            string += "#define TOLERANCE ({})\n".format(tolerance_q16)
+            string += "#define N_BANKS (NUM_CORES * BANKING_FACTOR)\n"
+            string += print_array(src_cfft_q16, "int16_t", "l2_pSrc", string)
+            string += print_array(dst_cfft_q16, "int16_t", "l2_pRes", string)
+            string += print_array(twi_cfft_q16, "int16_t", "l2_twiddleCoef_q16", string)
+            string += print_array(brv_cfft_q16, "int16_t", "l2_BitRevIndexTable", string)
+            filename = app_path + "/data_cfft_radix4_q16.h"
+
+        elif app_name == "cfft_radix2_q16":
+            # cfft_radix2_q16
+            src_cfft_q16, dst_cfft_q16, tolerance_q16= generate_cfft_q16(data_args.LEN)
+            brv_cfft_q16 = generate_bitreversal(data_args.LEN, 2)
+            twi_cfft_q16 = generate_twiddleCoefq15(data_args.LEN)
+            string += "#define LOG2 ({})\n".format(np.int(math.log2(data_args.LEN)))
+            string += "#define N_CSAMPLES ({})\n".format(data_args.LEN)
+            string += "#define N_RSAMPLES ({})\n".format(2 * data_args.LEN)
+            string += "#define N_TWIDDLES ({})\n".format(3 * data_args.LEN // 4)
+            string += "#define BITREVINDEXTABLE_LENGTH ({})\n".format(len(brv_cfft_q16))
+            string += "#define TOLERANCE ({})\n".format(tolerance_q16)
+            string += "#define N_BANKS (NUM_CORES * BANKING_FACTOR)\n"
+            string += print_array(src_cfft_q16, "int16_t", "l2_pSrc", string)
+            string += print_array(dst_cfft_q16, "int16_t", "l2_pRes", string)
+            string += print_array(twi_cfft_q16, "int16_t", "l2_twiddleCoef_q16", string)
+            string += print_array(brv_cfft_q16, "int16_t", "l2_BitRevIndexTable", string)
+            filename = app_path + "/data_cfft_radix2_q16.h"
+
+        elif app_name == "chest_q16":
+            src1_chest_q16, src2_chest_q16, dst_chest_q16 = generate_chest_q16(data_args.N_TX, data_args.N_RX, data_args.N_SAMPLES)
+            string += "#define N_TX {}\n".format(data_args.N_TX)
+            string += "#define N_RX {}\n".format(data_args.N_RX)
+            string += "#define N_SAMPLES {}\n".format(data_args.N_SAMPLES)
+            string += print_array(src1_chest_q16, "int16_t", "PilotRX", string)
+            string += print_array(src2_chest_q16, "int16_t", "PilotTX", string)
+            string += print_array(dst_chest_q16, "int16_t", "HEST", string)
+            filename = app_path + "/data_chest_q16.h"
+
+        else:
+            raise Exception("ERROR: no app with such name!!!")
+
+        print_file(string, filename)
diff --git a/software/runtime/runtime.mk b/software/runtime/runtime.mk
index 05606572f..48630dca4 100644
--- a/software/runtime/runtime.mk
+++ b/software/runtime/runtime.mk
@@ -140,10 +140,7 @@ RISCV_CCFLAGS_TESTS ?= $(RISCV_FLAGS_GCC) $(RISCV_FLAGS_COMMON_TESTS) -fvisibili
 	$(RISCV_CC) -P -E $(DEFINES) $< -o $@
 
 %.h: %.args
-	cat $< | xargs $(python) $(MEMPOOL_DIR)/scripts/gen_data.py --clangformat=$(LLVM_INSTALL_DIR)/bin/clang-format -o $@
-
-%.h: %.py
-	$(python) $<
+	$(python) $(MEMPOOL_DIR)/software/data/print_header.py --params $<
 
 # Bootrom
 %.elf: %.S $(ROOT_DIR)/bootrom.ld $(LINKER_SCRIPT)