-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
14ede34
commit 734cd31
Showing
10 changed files
with
1,135 additions
and
801 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
// Copyright 2022 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Author: Marco Bertuletti, ETH Zurich | ||
|
||
#include <stdint.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
/* Mempool runtime libraries */ | ||
#include "dma.h" | ||
#include "encoding.h" | ||
#include "printf.h" | ||
#include "runtime.h" | ||
#include "synchronization.h" | ||
#include "xpulp/builtins_v2.h" | ||
|
||
#include "data/data_ofdm.h" | ||
|
||
// CFFT Parameters | ||
#define SCHEDULED | ||
#define FOLDED_TWIDDLES | ||
#define BITREVERSETABLE | ||
#define ASM | ||
#define N_FFTs_COL 4 | ||
#define N_FFTs_ROW (N_RX / N_FFTs_COL) | ||
// CMATMUL Parameters | ||
#define NUM_COPIES (N_BANKS / (N_BEAMS * N_RX)) | ||
|
||
#define ROUNDS 3 | ||
dump(prova, 1); | ||
|
||
#include "kernel/mempool_radix4_cfft_butterfly_f16.h" | ||
#include "kernel/mempool_radix4_cfft_f16p.h" | ||
#include "kernel/mempool_radix4_cfft_q16_bitreversal.h" | ||
#include "kernel/mempool_cmatmul_f16.h" | ||
|
||
uint32_t arrival_index __attribute__((section(".l1_prio"))); | ||
__fp16 l1_pBF_Coef_folded[2 * N_BEAMS * N_RX * NUM_COPIES] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
|
||
__fp16 l1_pFFT_Src[N_FFTs_ROW * 8 * N_BANKS] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
__fp16 l1_pFFT_Dst[N_FFTs_ROW * 8 * N_BANKS] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
__fp16 l1_twiddleCoef_f16_src[6 * N_BANKS] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
__fp16 l1_twiddleCoef_f16_dst[6 * N_BANKS] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
uint16_t l1_BitRevIndexTable[BITREVINDEXTABLE_LENGTH] | ||
__attribute__((aligned(4 * N_BANKS), section(".l1_prio"))); | ||
|
||
/////////////////////////////////////////////////////////////////////////////////////////////////// | ||
/* MAIN */ | ||
int main() { | ||
uint32_t core_id = mempool_get_core_id(); | ||
uint32_t num_cores = mempool_get_core_count(); | ||
mempool_barrier_init(core_id); | ||
|
||
|
||
/* INITIALIZATION */ | ||
mempool_start_benchmark(); | ||
if (core_id == 0) { | ||
// Each FFT is folded over 4 memory rows | ||
// Each memory row is 2 * N_BANKS samples | ||
__atomic_store_n(&arrival_index, 0, __ATOMIC_RELAXED); | ||
dma_memcpy_blocking(l1_pFFT_Src, l2_pFFT_Src, (N_RX * N_SC) * sizeof(int32_t)); | ||
dma_memcpy_blocking(l1_BitRevIndexTable, l2_BitRevIndexTable, BITREVINDEXTABLE_LENGTH * sizeof(int16_t)); | ||
for (uint32_t i = 0; i < NUM_COPIES; i++) { | ||
dma_memcpy_blocking(l1_pBF_Coef_folded + i * (N_BEAMS * N_RX), l2_pBF_Coef, (N_BEAMS * N_RX) * sizeof(int32_t)); | ||
} | ||
for (uint32_t i = 0; i < N_FFTs_COL; i++) { | ||
dma_memcpy_blocking(l1_twiddleCoef_f16_src + (2 * i * N_BANKS), l2_twiddleCoef_f16, 3 * (N_SC / 4) * sizeof(int32_t)); | ||
} | ||
} | ||
mempool_barrier(num_cores); | ||
mempool_stop_benchmark(); | ||
dump_prova(0); | ||
|
||
// // Start of the iterations | ||
// for (uint32_t round = 0; round < ROUNDS; round++) { | ||
|
||
/* FFT */ | ||
mempool_start_benchmark(); | ||
uint32_t col_fftLen = N_SC / 4; | ||
uint32_t col_id = core_id / (N_SC / 16); | ||
// Distribute FFTs over columns | ||
mempool_radix4_cfft_f16p_scheduler(l1_pFFT_Src, l1_pFFT_Dst, N_SC, | ||
l1_twiddleCoef_f16_src + 2 * col_id * col_fftLen, | ||
l1_twiddleCoef_f16_dst + 2 * col_id * col_fftLen, | ||
l1_BitRevIndexTable, BITREVINDEXTABLE_LENGTH, 1, (N_SC / 16)); | ||
mempool_log_barrier(2, core_id); | ||
mempool_stop_benchmark(); | ||
dump_prova(1); | ||
|
||
/* BEAMFORMING */ | ||
mempool_start_benchmark(); | ||
cmatmul_2x4_folded_f16p(l1_pBF_Coef_folded, l1_pFFT_Src, l1_pFFT_Dst, N_BEAMS, N_RX, N_SC, core_id, num_cores); | ||
mempool_stop_benchmark(); | ||
dump_prova(2); | ||
|
||
mempool_start_benchmark(); | ||
// Transfer and synchronization | ||
if ((num_cores - 1) == __atomic_fetch_add(&arrival_index, 1, __ATOMIC_RELAXED)) { | ||
dma_memcpy_blocking(l1_pFFT_Src, l2_pFFT_Src, (N_RX * N_SC) * sizeof(int32_t)); | ||
dma_memcpy_blocking(l2_pBF_Dst, l1_pFFT_Dst, (N_RX * N_SC) * sizeof(int32_t)); | ||
for (uint32_t i = 0; i < N_FFTs_COL; i++) { | ||
dma_memcpy_blocking(l1_twiddleCoef_f16_src + (2 * i * N_BANKS), l2_twiddleCoef_f16, 3 * (N_SC / 4) * sizeof(int32_t)); | ||
} | ||
__atomic_store_n(&arrival_index, 0, __ATOMIC_RELAXED); | ||
__sync_synchronize(); // Full memory barrier | ||
wake_up_all(); | ||
} | ||
mempool_wfi(); | ||
mempool_stop_benchmark(); | ||
dump_prova(3); | ||
|
||
// } | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// Copyright 2022 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
\ | ||
<% def array_to_cstr(array): | ||
out = '{' | ||
i = 0 | ||
out += '\n' | ||
for a in array: | ||
out += '(__fp16){:0.5}f, '.format(a) | ||
i += 1 | ||
if i % 8 == 0: | ||
out += '\n' | ||
out = out[:-2] + '}' | ||
return out | ||
%> \ | ||
|
||
<% def array_to_str(array): | ||
out = '{' | ||
i = 0 | ||
out += '\n' | ||
for a in array: | ||
out += '{}, '.format(a) | ||
i += 1 | ||
if i % 16 == 0: | ||
out += '\n' | ||
out = out[:-2] + '}' | ||
return out | ||
%> \ | ||
|
||
#define LOG2 (${Log2Len}) | ||
#define N_RX (${N_rx}) | ||
#define N_BEAMS (${N_bs}) | ||
#define N_SC (${N_sc}) | ||
#define N_BANKS (NUM_CORES * BANKING_FACTOR) | ||
#define BITREVINDEXTABLE_LENGTH (${BitrevLen}) | ||
|
||
|
||
__fp16 l2_pFFT_Src[${2 * N_sc * N_rx}] = ${array_to_cstr(pFFT_src)}; | ||
|
||
__fp16 l2_twiddleCoef_f16[${2 * N_sc}] = ${array_to_cstr(pTw_coef)}; | ||
|
||
__fp16 l2_pBF_Coef[${2 * N_bs * N_rx}] = ${array_to_cstr(pBF_coef)}; | ||
|
||
__fp16 l2_pBF_Dst[${2 * N_bs * N_sc}] = ${array_to_cstr(pBF_dst)}; | ||
|
||
// Bitreversal | ||
uint16_t l2_BitRevIndexTable[${BitrevLen}] = ${array_to_str(bitrev)}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Copyright 2022 ETH Zurich and University of Bologna. | ||
# Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# Author: Marco Bertuletti, ETH Zurich | ||
|
||
#!/usr/bin/env python3 | ||
|
||
import numpy as np | ||
import math as M | ||
import argparse | ||
import pathlib | ||
from mako.template import Template | ||
from scipy.linalg import solve_triangular | ||
from sympy.combinatorics import Permutation | ||
|
||
################## | ||
# compute_result # | ||
################## | ||
|
||
def compute_bitreversal(N, R): | ||
# Decompose | ||
logR2 = [] | ||
idx = N | ||
while (idx >= R): | ||
logR2.append(int(M.log2(R))) | ||
idx = idx // R | ||
if (idx > 1): | ||
logR2.append(int(M.log2(idx))) | ||
# Bitreversal | ||
indexes = [] | ||
for x in range(N): | ||
result = 0 | ||
for bits in logR2: | ||
mask = (0xffffffff >> (32 - bits)) | ||
result = (result << bits) | (x & mask) | ||
x = x >> bits | ||
indexes.append(result) | ||
|
||
# Create transpositions table | ||
tps = [] | ||
for c in Permutation.from_sequence(indexes).cyclic_form: | ||
for i in range(len(c) - 1): | ||
tps.append([c[i] * 8, c[-1] * 8]) | ||
return tps | ||
|
||
def gen_data_header_file(outdir: pathlib.Path.cwd(), tpl: pathlib.Path.cwd(), **kwargs): | ||
|
||
file = outdir / f"data_{kwargs['name']}.h" | ||
|
||
print(tpl, outdir, kwargs['name']) | ||
|
||
template = Template(filename=str(tpl)) | ||
with file.open('w') as f: | ||
f.write(template.render(**kwargs)) | ||
|
||
def main(): | ||
|
||
parser = argparse.ArgumentParser(description='Generate data for kernels') | ||
parser.add_argument( | ||
"-o", | ||
"--outdir", | ||
type=pathlib.Path, | ||
default=pathlib.Path(__file__).parent.absolute(), | ||
required=False, | ||
help='Select out directory of generated data files' | ||
) | ||
parser.add_argument( | ||
"-t", | ||
"--tpl", | ||
type=pathlib.Path, | ||
required=False, | ||
default=pathlib.Path(__file__).parent.absolute() / "data_ofdm.h.tpl", | ||
help='Path to mako template' | ||
) | ||
parser.add_argument( | ||
"-v", | ||
"--verbose", | ||
action='store_true', | ||
help='Set verbose' | ||
) | ||
parser.add_argument( | ||
"-rx", | ||
"--receivers", | ||
type=int, | ||
required=False, | ||
default=64, | ||
help='First dimension.' | ||
) | ||
parser.add_argument( | ||
"-bs", | ||
"--beams", | ||
type=int, | ||
required=False, | ||
default=32, | ||
help='Second dimension.' | ||
) | ||
parser.add_argument( | ||
"-sc", | ||
"--subcarriers", | ||
type=int, | ||
required=False, | ||
default=4096, | ||
help='Iterations.' | ||
) | ||
|
||
args = parser.parse_args() | ||
N_rx=args.receivers | ||
N_bs=args.beams | ||
N_sc=args.subcarriers | ||
|
||
pFFT_src = ( np.random.rand(2 * N_rx * N_sc) ).astype(np.float16) | ||
pTw_coef = ( np.random.rand(int(3 * N_sc / 4)) ).astype(np.float16) | ||
pBF_coef = ( np.random.rand(2 * N_rx * N_bs) ).astype(np.float16) | ||
pBF_dst = ( np.random.rand(2 * N_bs * N_sc) ).astype(np.float16) | ||
|
||
Bitreversal = np.ndarray.flatten(np.array(compute_bitreversal(N_sc, 2))) | ||
|
||
kwargs = {'name': 'ofdm', | ||
'pFFT_src': pFFT_src, | ||
'pTw_coef': pTw_coef, | ||
'pBF_coef': pBF_coef, | ||
'pBF_dst': pBF_dst, | ||
'bitrev': Bitreversal, | ||
'N_rx' : N_rx, | ||
'N_bs' : N_bs, | ||
'N_sc' : N_sc, | ||
'Log2Len': int(np.log2(N_sc)), | ||
'BitrevLen': len(Bitreversal)} | ||
gen_data_header_file(args.outdir, args.tpl, **kwargs) | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.