Skip to content

Commit

Permalink
[software] Add OFDM application
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertuletti committed Dec 15, 2023
1 parent 14ede34 commit 734cd31
Show file tree
Hide file tree
Showing 10 changed files with 1,135 additions and 801 deletions.
123 changes: 123 additions & 0 deletions software/apps/ofdm/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Mempool runtime libraries */
#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"
#include "xpulp/builtins_v2.h"

#include "data/data_ofdm.h"

// CFFT Parameters
#define SCHEDULED
#define FOLDED_TWIDDLES
#define BITREVERSETABLE
#define ASM
#define N_FFTs_COL 4
#define N_FFTs_ROW (N_RX / N_FFTs_COL)
// CMATMUL Parameters
#define NUM_COPIES (N_BANKS / (N_BEAMS * N_RX))

#define ROUNDS 3
dump(prova, 1);

#include "kernel/mempool_radix4_cfft_butterfly_f16.h"
#include "kernel/mempool_radix4_cfft_f16p.h"
#include "kernel/mempool_radix4_cfft_q16_bitreversal.h"
#include "kernel/mempool_cmatmul_f16.h"

uint32_t arrival_index __attribute__((section(".l1_prio")));
__fp16 l1_pBF_Coef_folded[2 * N_BEAMS * N_RX * NUM_COPIES]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));

__fp16 l1_pFFT_Src[N_FFTs_ROW * 8 * N_BANKS]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));
__fp16 l1_pFFT_Dst[N_FFTs_ROW * 8 * N_BANKS]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));
__fp16 l1_twiddleCoef_f16_src[6 * N_BANKS]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));
__fp16 l1_twiddleCoef_f16_dst[6 * N_BANKS]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));
uint16_t l1_BitRevIndexTable[BITREVINDEXTABLE_LENGTH]
__attribute__((aligned(4 * N_BANKS), section(".l1_prio")));

///////////////////////////////////////////////////////////////////////////////////////////////////
/* MAIN */
int main() {
uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
mempool_barrier_init(core_id);


/* INITIALIZATION */
mempool_start_benchmark();
if (core_id == 0) {
// Each FFT is folded over 4 memory rows
// Each memory row is 2 * N_BANKS samples
__atomic_store_n(&arrival_index, 0, __ATOMIC_RELAXED);
dma_memcpy_blocking(l1_pFFT_Src, l2_pFFT_Src, (N_RX * N_SC) * sizeof(int32_t));
dma_memcpy_blocking(l1_BitRevIndexTable, l2_BitRevIndexTable, BITREVINDEXTABLE_LENGTH * sizeof(int16_t));
for (uint32_t i = 0; i < NUM_COPIES; i++) {
dma_memcpy_blocking(l1_pBF_Coef_folded + i * (N_BEAMS * N_RX), l2_pBF_Coef, (N_BEAMS * N_RX) * sizeof(int32_t));
}
for (uint32_t i = 0; i < N_FFTs_COL; i++) {
dma_memcpy_blocking(l1_twiddleCoef_f16_src + (2 * i * N_BANKS), l2_twiddleCoef_f16, 3 * (N_SC / 4) * sizeof(int32_t));
}
}
mempool_barrier(num_cores);
mempool_stop_benchmark();
dump_prova(0);

// // Start of the iterations
// for (uint32_t round = 0; round < ROUNDS; round++) {

/* FFT */
mempool_start_benchmark();
uint32_t col_fftLen = N_SC / 4;
uint32_t col_id = core_id / (N_SC / 16);
// Distribute FFTs over columns
mempool_radix4_cfft_f16p_scheduler(l1_pFFT_Src, l1_pFFT_Dst, N_SC,
l1_twiddleCoef_f16_src + 2 * col_id * col_fftLen,
l1_twiddleCoef_f16_dst + 2 * col_id * col_fftLen,
l1_BitRevIndexTable, BITREVINDEXTABLE_LENGTH, 1, (N_SC / 16));
mempool_log_barrier(2, core_id);
mempool_stop_benchmark();
dump_prova(1);

/* BEAMFORMING */
mempool_start_benchmark();
cmatmul_2x4_folded_f16p(l1_pBF_Coef_folded, l1_pFFT_Src, l1_pFFT_Dst, N_BEAMS, N_RX, N_SC, core_id, num_cores);
mempool_stop_benchmark();
dump_prova(2);

mempool_start_benchmark();
// Transfer and synchronization
if ((num_cores - 1) == __atomic_fetch_add(&arrival_index, 1, __ATOMIC_RELAXED)) {
dma_memcpy_blocking(l1_pFFT_Src, l2_pFFT_Src, (N_RX * N_SC) * sizeof(int32_t));
dma_memcpy_blocking(l2_pBF_Dst, l1_pFFT_Dst, (N_RX * N_SC) * sizeof(int32_t));
for (uint32_t i = 0; i < N_FFTs_COL; i++) {
dma_memcpy_blocking(l1_twiddleCoef_f16_src + (2 * i * N_BANKS), l2_twiddleCoef_f16, 3 * (N_SC / 4) * sizeof(int32_t));
}
__atomic_store_n(&arrival_index, 0, __ATOMIC_RELAXED);
__sync_synchronize(); // Full memory barrier
wake_up_all();
}
mempool_wfi();
mempool_stop_benchmark();
dump_prova(3);

// }

return 0;
}
48 changes: 48 additions & 0 deletions software/runtime/data/data_ofdm.h.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
\
<% def array_to_cstr(array):
out = '{'
i = 0
out += '\n'
for a in array:
out += '(__fp16){:0.5}f, '.format(a)
i += 1
if i % 8 == 0:
out += '\n'
out = out[:-2] + '}'
return out
%> \

<% def array_to_str(array):
out = '{'
i = 0
out += '\n'
for a in array:
out += '{}, '.format(a)
i += 1
if i % 16 == 0:
out += '\n'
out = out[:-2] + '}'
return out
%> \

#define LOG2 (${Log2Len})
#define N_RX (${N_rx})
#define N_BEAMS (${N_bs})
#define N_SC (${N_sc})
#define N_BANKS (NUM_CORES * BANKING_FACTOR)
#define BITREVINDEXTABLE_LENGTH (${BitrevLen})


__fp16 l2_pFFT_Src[${2 * N_sc * N_rx}] = ${array_to_cstr(pFFT_src)};

__fp16 l2_twiddleCoef_f16[${2 * N_sc}] = ${array_to_cstr(pTw_coef)};

__fp16 l2_pBF_Coef[${2 * N_bs * N_rx}] = ${array_to_cstr(pBF_coef)};

__fp16 l2_pBF_Dst[${2 * N_bs * N_sc}] = ${array_to_cstr(pBF_dst)};

// Bitreversal
uint16_t l2_BitRevIndexTable[${BitrevLen}] = ${array_to_str(bitrev)};
133 changes: 133 additions & 0 deletions software/runtime/data/data_ofdm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright 2022 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

# Author: Marco Bertuletti, ETH Zurich

#!/usr/bin/env python3

import numpy as np
import math as M
import argparse
import pathlib
from mako.template import Template
from scipy.linalg import solve_triangular
from sympy.combinatorics import Permutation

##################
# compute_result #
##################

def compute_bitreversal(N, R):
# Decompose
logR2 = []
idx = N
while (idx >= R):
logR2.append(int(M.log2(R)))
idx = idx // R
if (idx > 1):
logR2.append(int(M.log2(idx)))
# Bitreversal
indexes = []
for x in range(N):
result = 0
for bits in logR2:
mask = (0xffffffff >> (32 - bits))
result = (result << bits) | (x & mask)
x = x >> bits
indexes.append(result)

# Create transpositions table
tps = []
for c in Permutation.from_sequence(indexes).cyclic_form:
for i in range(len(c) - 1):
tps.append([c[i] * 8, c[-1] * 8])
return tps

def gen_data_header_file(outdir: pathlib.Path.cwd(), tpl: pathlib.Path.cwd(), **kwargs):

file = outdir / f"data_{kwargs['name']}.h"

print(tpl, outdir, kwargs['name'])

template = Template(filename=str(tpl))
with file.open('w') as f:
f.write(template.render(**kwargs))

def main():

parser = argparse.ArgumentParser(description='Generate data for kernels')
parser.add_argument(
"-o",
"--outdir",
type=pathlib.Path,
default=pathlib.Path(__file__).parent.absolute(),
required=False,
help='Select out directory of generated data files'
)
parser.add_argument(
"-t",
"--tpl",
type=pathlib.Path,
required=False,
default=pathlib.Path(__file__).parent.absolute() / "data_ofdm.h.tpl",
help='Path to mako template'
)
parser.add_argument(
"-v",
"--verbose",
action='store_true',
help='Set verbose'
)
parser.add_argument(
"-rx",
"--receivers",
type=int,
required=False,
default=64,
help='First dimension.'
)
parser.add_argument(
"-bs",
"--beams",
type=int,
required=False,
default=32,
help='Second dimension.'
)
parser.add_argument(
"-sc",
"--subcarriers",
type=int,
required=False,
default=4096,
help='Iterations.'
)

args = parser.parse_args()
N_rx=args.receivers
N_bs=args.beams
N_sc=args.subcarriers

pFFT_src = ( np.random.rand(2 * N_rx * N_sc) ).astype(np.float16)
pTw_coef = ( np.random.rand(int(3 * N_sc / 4)) ).astype(np.float16)
pBF_coef = ( np.random.rand(2 * N_rx * N_bs) ).astype(np.float16)
pBF_dst = ( np.random.rand(2 * N_bs * N_sc) ).astype(np.float16)

Bitreversal = np.ndarray.flatten(np.array(compute_bitreversal(N_sc, 2)))

kwargs = {'name': 'ofdm',
'pFFT_src': pFFT_src,
'pTw_coef': pTw_coef,
'pBF_coef': pBF_coef,
'pBF_dst': pBF_dst,
'bitrev': Bitreversal,
'N_rx' : N_rx,
'N_bs' : N_bs,
'N_sc' : N_sc,
'Log2Len': int(np.log2(N_sc)),
'BitrevLen': len(Bitreversal)}
gen_data_header_file(args.outdir, args.tpl, **kwargs)

if __name__ == "__main__":
main()
Loading

0 comments on commit 734cd31

Please sign in to comment.