Skip to content

Commit

Permalink
[software] Add kernel for fixed-point q16 MIMO MMSE estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertuletti committed Jan 4, 2024
1 parent 734cd31 commit 8649ee5
Show file tree
Hide file tree
Showing 23 changed files with 1,266 additions and 252 deletions.
77 changes: 20 additions & 57 deletions software/apps/chest_q16/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,107 +8,70 @@
#include <stdlib.h>
#include <string.h>

#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"
#include "xpulp/builtins_v2.h"

#include "data/data_chest_q16.h"

#include "kernel/chest_q16p.h"
#include "kernel/chest_q16s.h"
#include "kernel/mempool_checks.h"
#include "kernel/mempool_chest_q16p.h"
#include "kernel/mempool_chest_q16s.h"

//#define SINGLE
#define PARALLEL

int16_t PilotTX_l1[2 * N_TX * N_SAMPLES]
int16_t l1_PilotTX[2 * N_TX * N_SAMPLES]
__attribute__((aligned(N_TX * N_SAMPLES), section(".l1")));
int16_t PilotRX_l1[2 * N_RX * N_SAMPLES]
int16_t l1_PilotRX[2 * N_RX * N_SAMPLES]
__attribute__((aligned(N_TX * N_SAMPLES), section(".l1")));
int16_t HEST_l1[2 * N_RX * N_TX * N_SAMPLES]
int16_t l1_HEST[2 * N_RX * N_TX * N_SAMPLES]
__attribute__((aligned(N_TX * N_SAMPLES), section(".l1")));

void initialize_vector(int16_t *pSrc_l2, int16_t *pDst_l1, uint32_t N_el) {
uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
for (uint32_t i = core_id; i < N_el; i += num_cores) {
pDst_l1[i] = (int16_t)pSrc_l2[i];
}
mempool_barrier(num_cores);
return;
}
int main() {

void zeros(int16_t *pSrc_l1, uint32_t N_el) {
uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
for (uint32_t i = core_id; i < N_el; i += num_cores) {
pSrc_l1[i] = (int16_t)0;
}
mempool_barrier(num_cores);
return;
}
mempool_barrier_init(core_id);

void check_result(int16_t *pRes, int16_t *pExp, uint32_t N_el) {
uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
/* Initialize matrices */
if (core_id == 0) {
for (uint32_t i = 0; i < N_el; i++) {
if (pExp[i] != pRes[i]) {
printf("ERROR: Exp[%6d]=%6d Res[%6d]=%6d\n", i, pExp[i], i, pRes[i]);
}
}
dma_memcpy_blocking(l1_PilotRX, l2_PilotRX,
(N_RX * N_SAMPLES) * sizeof(int32_t));
dma_memcpy_blocking(l1_PilotTX, l2_PilotTX,
(N_TX * N_SAMPLES) * sizeof(int32_t));
}
// Wait at barrier until everyone is ready
mempool_barrier(num_cores);
return;
}

int main() {

uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
mempool_barrier_init(core_id);

initialize_vector(PilotTX, PilotTX_l1, 2 * (N_TX * N_SAMPLES));
initialize_vector(PilotRX, PilotRX_l1, 2 * (N_RX * N_SAMPLES));
zeros(HEST_l1, 2 * (N_RX * N_TX * N_SAMPLES));

#ifdef SINGLE
if (core_id == 0) {
mempool_chest_q16s_unrolled4_xpulpv2(HEST_l1, PilotRX_l1, PilotTX_l1, N_RX,
mempool_chest_q16s_unrolled4_xpulpv2(l1_HEST, l1_PilotRX, l1_PilotTX, N_RX,
N_TX, N_SAMPLES);
// mempool_chest_q16s_unrolled4(HEST_l1, PilotRX_l1, PilotTX_l1, N_RX, N_TX,
// N_SAMPLES);
mempool_start_benchmark();
mempool_chest_q16s_unrolled4_xpulpv2(HEST_l1, PilotRX_l1, PilotTX_l1, N_RX,
mempool_chest_q16s_unrolled4_xpulpv2(l1_HEST, l1_PilotRX, l1_PilotTX, N_RX,
N_TX, N_SAMPLES);
// mempool_chest_q16s_unrolled4(HEST_l1, PilotRX_l1, PilotTX_l1, N_RX, N_TX,
// N_SAMPLES);
mempool_stop_benchmark();
}
mempool_barrier(num_cores);
#endif

#ifdef PARALLEL

// mempool_chest_q16p_unrolled4_xpulpv2(HEST_l1, PilotRX_l1, PilotTX_l1,
// N_RX, N_TX, N_SAMPLES, core_id, num_cores); mempool_start_benchmark();
// mempool_chest_q16p_unrolled4_xpulpv2(HEST_l1, PilotRX_l1, PilotTX_l1,
// N_RX, N_TX, N_SAMPLES, core_id, num_cores); mempool_stop_benchmark();

if (core_id < N_SAMPLES) {
mempool_chest_q16p_unrolled4_xpulpv2_local(HEST_l1, PilotRX_l1, PilotTX_l1,
mempool_chest_q16p_unrolled4_xpulpv2_local(l1_HEST, l1_PilotRX, l1_PilotTX,
N_RX, N_TX, N_SAMPLES, core_id);
mempool_start_benchmark();
mempool_chest_q16p_unrolled4_xpulpv2_local(HEST_l1, PilotRX_l1, PilotTX_l1,
mempool_chest_q16p_unrolled4_xpulpv2_local(l1_HEST, l1_PilotRX, l1_PilotTX,
N_RX, N_TX, N_SAMPLES, core_id);
mempool_stop_benchmark();
}

mempool_barrier(num_cores);
#endif

check_result(HEST_l1, HEST, 2 * N_RX * N_TX * N_SAMPLES);

mempool_check_q16(l1_HEST, l2_HEST, 2 * N_RX * N_TX * N_SAMPLES, 100, 0);
return 0;
}
64 changes: 64 additions & 0 deletions software/apps/cholesky_q16/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"
#include "xpulp/builtins_v2.h"

#include "data/data_cholesky_q16.h"
#include "kernel/mempool_checks.h"
#include "kernel/mempool_cholesky_q16s.h"

#define SINGLE

int16_t l1_GIn[2 * dim_N * dim_N * N_SAMPLES]
__attribute__((section(".l1_prio")));
int16_t l1_LOut[2 * dim_N * dim_N * N_SAMPLES]
__attribute__((section(".l1_prio")));

int main() {
uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
mempool_barrier_init(core_id); // Initialize barrier and synchronize

/* Initialize matrices */
if (core_id == 0) {
dma_memcpy_blocking(l1_GIn, l2_GIn,
dim_N * dim_N * N_SAMPLES * sizeof(int32_t));
dma_memcpy_blocking(l1_LOut, l2_LOut,
dim_N * dim_N * N_SAMPLES * sizeof(int32_t));
}
// Wait at barrier until everyone is ready
mempool_barrier(num_cores);

#ifdef SINGLE
/* Benchmark */
if (core_id == 0) {
mempool_start_benchmark();
mempool_cholesky_q16vecs((v2s *)l1_GIn, (v2s *)l1_LOut, dim_N);
mempool_stop_benchmark();
}
mempool_barrier(num_cores);
#endif

#ifdef PARALLEL
for (uint32_t i = core_id; i < N_SAMPLES; i += num_cores) {
mempool_start_benchmark();
__fp16 *ptr_in_matrix = l1_GIn + i * 2 * dim_N * dim_N;
__fp16 *ptr_out_matrix = l1_LOut + i * 2 * dim_N * dim_N;
mempool_cholesky_q16s((v2s *)ptr_in_matrix, (v2s *)ptr_out_matrix, dim_N);
}
mempool_barrier(num_cores);
mempool_stop_benchmark();
#endif

mempool_check_q16(l1_LOut, l2_LOut, 2 * dim_N * dim_N, 0.01f, 0);
mempool_barrier(num_cores);
return 0;
}
84 changes: 0 additions & 84 deletions software/apps/cholesky_q32/initialization.h

This file was deleted.

Loading

0 comments on commit 8649ee5

Please sign in to comment.