Skip to content

Commit

Permalink
Itarative radix-2 FFT algorithm implementation
Browse files Browse the repository at this point in the history
* using but-reversal permutation.
* FFT block contains 3 template parameters: data type and algorithm type.
* Add test of different FFT algorithms.
  • Loading branch information
slebedev committed Sep 28, 2023
1 parent b2d5c75 commit 2d1fad7
Show file tree
Hide file tree
Showing 9 changed files with 734 additions and 429 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ target_include_directories(pmtv INTERFACE ${pmt_SOURCE_DIR}/include/)

# FFTW3 is build 2 times for float and double precisions
SET(FFTW_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/fftw)
SET(FFTW_CFLAGS "${CFLAGS} -w")
if (EMSCRIPTEN)
SET(FFTW_CFLAGS "${CFLAGS} -w")
SET(FFTW_CONFIG cd ${FFTW_PREFIX}/src/ && emconfigure ./configure --enable-silent-rules --quiet --disable-fortran --prefix=${FFTW_PREFIX}/install)
SET(FFTW_BUILD emmake make -j CFLAGS=${FFTW_CFLAGS} --silent V=0 && emmake make install --silent V=0 && emmake make clean --silent V=0)
else ()
SET(FFTW_CFLAGS "${CFLAGS} -w -O3 -march=native -mtune=native")
SET(FFTW_CONFIG ${FFTW_PREFIX}/src/configure --enable-silent-rules --quiet --disable-fortran --prefix=${FFTW_PREFIX}/install)
SET(FFTW_BUILD make -j CFLAGS=${FFTW_CFLAGS} --silent V=0 && make install --silent V=0 && make clean --silent V=0)
endif ()
Expand Down
129 changes: 50 additions & 79 deletions bench/bm_fft.cpp
Original file line number Diff line number Diff line change
@@ -1,53 +1,19 @@
#include "benchmark.hpp"

#include "../test/blocklib/algorithm/fft/fft.hpp"
#include "../test/blocklib/algorithm/fft/fftw.hpp"
#include "../test/blocklib/core/fft/fft.hpp"

#include "benchmark.hpp"
#include <fmt/format.h>
#include <numbers>

/// This custom implementation of FFT ("compute_fft_v1" and "computeFftV1") is done only for performance comparison with default FFTW implementation.

/**
* Real-valued fast fourier transform algorithms
* H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus (1987),
* in: IEEE Trans on Acoustics, Speech, & Signal Processing, 35
*/

template<typename T>
requires gr::blocks::fft::ComplexType<T>
void
computeFftV1(std::vector<T> &signal) {
const std::size_t N{ signal.size() };
for (std::size_t j = 0, rev = 0; j < N; j++) {
if (j < rev) std::swap(signal[j], signal[rev]);
auto maskLen = static_cast<std::size_t>(std::countr_zero(j + 1) + 1);
rev ^= N - (N >> maskLen);
}

for (std::size_t s = 2; s <= N; s *= 2) {
const std::size_t m{ s / 2 };
const T w{ exp(T(0., static_cast<typename T::value_type>(-2. * std::numbers::pi) / static_cast<typename T::value_type>(s))) };
for (std::size_t k = 0; k < N; k += s) {
T wk{ 1., 0. };
for (std::size_t j = 0; j < m; j++) {
const T t{ wk * signal[k + j + m] };
const T u{ signal[k + j] };
signal[k + j] = u + t;
signal[k + j + m] = u - t;
wk *= w;
}
}
}
}

/// This custom implementation of FFT is done only for performance comparison with default FFTW implementation.
/**
* Fast Fourier-Transform according to Cooley–Tukey
* reference: https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Pseudocode
*/
template<typename T>
requires gr::blocks::fft::ComplexType<T>
requires gr::algorithm::ComplexType<T>
void
computeFftV2(std::vector<T> &signal) {
computeFFFTCooleyTukey(std::vector<T> &signal) {
const std::size_t N{ signal.size() };

if (N == 1) return;
Expand All @@ -59,35 +25,23 @@ computeFftV2(std::vector<T> &signal) {
odd[i] = signal[2 * i + 1];
}

computeFftV2(even);
computeFftV2(odd);
computeFFFTCooleyTukey(even);
computeFFFTCooleyTukey(odd);

const typename T::value_type wn{ static_cast<typename T::value_type>(2. * std::numbers::pi) / static_cast<typename T::value_type>(N) };
const typename T::value_type wn{ static_cast<typename T::value_type>(2. * std::numbers::pi_v<double>) / static_cast<typename T::value_type>(N) };
for (std::size_t i = 0; i < N / 2; i++) {
const T wkn(std::cos(wn * static_cast<typename T::value_type>(i)), std::sin(wn * static_cast<typename T::value_type>(i)));
signal[i] = even[i] + wkn * odd[i];
signal[i + N / 2] = even[i] - wkn * odd[i];
}
}

template<typename T>
requires gr::blocks::fft::ComplexType<T>
std::vector<typename T::value_type>
computeMagnitudeSpectrum(std::vector<T> &fftSignal) {
const std::size_t N{ fftSignal.size() };
std::vector<typename T::value_type> magnitudeSpectrum(N / 2);
for (std::size_t i = 0; i < N / 2; i++) {
magnitudeSpectrum[i] = std::hypot(fftSignal[i].real(), fftSignal[i].imag()) * static_cast<typename T::value_type>(2.) / static_cast<typename T::value_type>(N);
}
return magnitudeSpectrum;
}

template<typename T>
std::vector<T>
generateSinSample(std::size_t N, double sampleRate, double frequency, double amplitude) {
std::vector<T> signal(N);
for (std::size_t i = 0; i < N; i++) {
if constexpr (gr::blocks::fft::ComplexType<T>) {
if constexpr (gr::algorithm::ComplexType<T>) {
signal[i] = { static_cast<typename T::value_type>(amplitude * std::sin(2. * std::numbers::pi * frequency * static_cast<double>(i) / sampleRate)), 0. };
} else {
signal[i] = static_cast<T>(amplitude * std::sin(2. * std::numbers::pi * frequency * static_cast<double>(i) / sampleRate));
Expand All @@ -98,7 +52,7 @@ generateSinSample(std::size_t N, double sampleRate, double frequency, double amp

template<typename T>
void
testFft(bool withMagSpectrum) {
testFFT() {
using namespace benchmark;
using namespace boost::ut::reflection;

Expand All @@ -110,50 +64,67 @@ testFft(bool withMagSpectrum) {

static_assert(std::has_single_bit(N));

std::vector<T> signal = generateSinSample<T>(N, sampleRate, frequency, amplitude);
std::string nameOpt = withMagSpectrum ? "fft+mag" : "fft";
std::vector<T> signal = generateSinSample<T>(N, sampleRate, frequency, amplitude);

{
gr::blocks::fft::fft<T> fft1{};
gr::blocks::fft::FFT<T, gr::algorithm::FFTw<T>> fft1{};
std::ignore = fft1.settings().set({ { "fftSize", N } });
std::ignore = fft1.settings().apply_staged_parameters();
fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());

::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fftw", nameOpt, type_name<T>())) = [&fft1, &withMagSpectrum] {
::benchmark::benchmark<nRepetitions>(fmt::format("{} - fftw", type_name<T>())) = [&fft1] {
fft1.prepareInput();
fft1.computeFFT();
};
}
{
gr::blocks::fft::FFT<T, gr::algorithm::FFT<T>> fft1;
std::ignore = fft1.settings().set({ { "fftSize", N } });
std::ignore = fft1.settings().apply_staged_parameters();
fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());
::benchmark::benchmark<nRepetitions>(fmt::format("{} - fft", type_name<T>())) = [&fft1] {
fft1.prepareInput();
fft1.computeFft();
if (withMagSpectrum) fft1.computeMagnitudeSpectrum();
fft1.computeFFT();
};
}

if constexpr (gr::blocks::fft::ComplexType<T>) {
::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fft_v1", nameOpt, type_name<T>())) = [&signal, &withMagSpectrum] {
::benchmark::benchmark<nRepetitions>(fmt::format("{} - fftCT", type_name<T>())) = [&signal] {
auto signalCopy = signal;
computeFftV1<T>(signalCopy);
if (withMagSpectrum) [[maybe_unused]]
auto magnitudeSpectrum = computeMagnitudeSpectrum<T>(signalCopy);
computeFFFTCooleyTukey<T>(signalCopy);
};
}

::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fft_v2", nameOpt, type_name<T>())) = [&signal, &withMagSpectrum] {
auto signalCopy = signal;
computeFftV2<T>(signalCopy);
if (withMagSpectrum) [[maybe_unused]]
auto magnitudeSpectrum = computeMagnitudeSpectrum<T>(signalCopy);
{
gr::blocks::fft::FFT<T, gr::algorithm::FFTw<T>> fft1{};
std::ignore = fft1.settings().set({ { "fftSize", N } });
std::ignore = fft1.settings().apply_staged_parameters();
fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());
fft1.prepareInput();
fft1.computeFFT();

::benchmark::benchmark<nRepetitions>(fmt::format("{} - magnitude", type_name<T>())) = [&fft1] { fft1.computeMagnitudeSpectrum(); };
::benchmark::benchmark<nRepetitions>(fmt::format("{} - phase", type_name<T>())) = [&fft1] { fft1.computePhaseSpectrum(); };

double sum{ 0. };
fft1.computeMagnitudeSpectrum();
fft1.computePhaseSpectrum();
::benchmark::benchmark<nRepetitions>(fmt::format("{} - dataset", type_name<T>())) = [&fft1, &sum] {
const auto ds1 = fft1.createDataset();
sum += static_cast<double>(ds1.signal_values[0]);
};
}

::benchmark::results::add_separator();
}

inline const boost::ut::suite _fft_bm_tests = [] {
std::tuple<std::complex<float>, std::complex<double>> complexTypesToTest{};
std::tuple<float, double> realTypesToTest{};

std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(false), ...); }, complexTypesToTest);
benchmark::results::add_separator();
std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(true), ...); }, complexTypesToTest);
benchmark::results::add_separator();
std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(false), ...); }, realTypesToTest);
benchmark::results::add_separator();
std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(true), ...); }, realTypesToTest);
std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFFT<TArgs>(), ...); }, complexTypesToTest);
// benchmark::results::add_separator();
std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFFT<TArgs>(), ...); }, realTypesToTest);
};

int
Expand Down
112 changes: 112 additions & 0 deletions test/blocklib/algorithm/fft/fft.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP
#define GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP

#include "fft_types.hpp"
#include "node.hpp"
#include "window.hpp"
#include <ranges>

namespace gr::algorithm {

template<typename T>
requires(ComplexType<T> || std::floating_point<T>)
struct FFT {
using PrecisionType = typename FFTInternal<T>::PrecisionType;
using OutDataType = typename FFTInternal<T>::OutDataType;

std::vector<OutDataType> twiddleFactors{};
std::size_t fftSize{ 0 };

FFT() = default;
FFT(const FFT &rhs) = delete;
FFT(FFT &&rhs) noexcept = delete;
FFT &
operator=(const FFT &rhs)
= delete;
FFT &
operator=(FFT &&rhs) noexcept
= delete;

~FFT() = default;

void
initAll() {
precomputeTwiddleFactors();
}

std::vector<OutDataType>
computeFFT(const std::vector<T> &in) noexcept {
std::vector<OutDataType> out(in.size());
computeFFT(in, out);
return out;
}

void
computeFFT(const std::vector<T> &in, std::vector<OutDataType> &out) noexcept {
if (!std::has_single_bit(in.size())) {
throw std::runtime_error(fmt::format("Input data must have 2^N samples, input size: ", in.size()));
}
if (fftSize != in.size()) {
fftSize = in.size();
initAll();
}

// For the moment no optimization for real data inputs, just create complex with zero imaginary value.
if constexpr (!ComplexType<T>) {
std::transform(in.begin(), in.end(), out.begin(), [](const auto c) { return OutDataType(c, 0.); });
} else {
std::copy(in.begin(), in.end(), out.begin());
}

/**
* Real-valued fast fourier transform algorithms
* H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus (1987),
* in: IEEE Trans on Acoustics, Speech, & Signal Processing, 35
*/
bitReversalPermutation(out);

std::size_t omega_kCounter = 0;
for (std::size_t s = 2; s <= fftSize; s *= 2) {
const auto half_s = s / 2;
for (std::size_t k = 0; k < fftSize; k += s) {
for (std::size_t j = 0; j < half_s; j++) {
const auto t{ twiddleFactors[omega_kCounter++] * out[k + j + half_s] };
const auto u{ out[k + j] };
out[k + j] = u + t;
out[k + j + half_s] = u - t;
}
}
}
}

private:
void
bitReversalPermutation(std::vector<OutDataType> &vec) const noexcept {
for (std::size_t j = 0, rev = 0; j < fftSize; j++) {
if (j < rev) std::swap(vec[j], vec[rev]);
auto maskLen = static_cast<std::size_t>(std::countr_zero(j + 1) + 1);
rev ^= fftSize - (fftSize >> maskLen);
}
}

void
precomputeTwiddleFactors() {
twiddleFactors.clear();
const auto minus2Pi = PrecisionType(-2. * std::numbers::pi);
for (std::size_t s = 2; s <= fftSize; s *= 2) {
const std::size_t m{ s / 2 };
const OutDataType w{ std::exp(OutDataType(0., minus2Pi / static_cast<PrecisionType>(s))) };
for (std::size_t k = 0; k < fftSize; k += s) {
OutDataType wk{ 1., 0. };
for (std::size_t j = 0; j < m; j++) {
twiddleFactors.push_back(wk);
wk *= w;
}
}
}
}
};

} // namespace gr::algorithm

#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP
34 changes: 34 additions & 0 deletions test/blocklib/algorithm/fft/fft_common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP
#define GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP

#include "fft_types.hpp"
#include <algorithm>
#include <cmath>
#include <vector>

namespace gr::algorithm {

template<ComplexType T, typename U>
void
computeMagnitudeSpectrum(const std::vector<T> &fftOut, std::vector<U> &magnitudeSpectrum, std::size_t fftSize, bool outputInDb) {
if (fftOut.size() < magnitudeSpectrum.size()) {
throw std::invalid_argument(fmt::format("FFT vector size ({}) must be more or equal than magnitude vector size ({}).", fftOut.size(), magnitudeSpectrum.size()));
}
using PrecisionType = typename T::value_type;
std::transform(fftOut.begin(), std::next(fftOut.begin(), magnitudeSpectrum.size()), magnitudeSpectrum.begin(), [fftSize, outputInDb](const auto &c) {
const auto mag{ std::hypot(c.real(), c.imag()) * PrecisionType(2.0) / static_cast<PrecisionType>(fftSize) };
return static_cast<U>(outputInDb ? PrecisionType(20.) * std::log10(std::abs(mag)) : mag);
});
}

template<ComplexType T, typename U>
void
computePhaseSpectrum(const std::vector<T> &fftOut, std::vector<U> &phaseSpectrum) {
if (fftOut.size() < phaseSpectrum.size()) {
throw std::invalid_argument(fmt::format("FFT vector size ({}) must be more or equal than phase vector size ({}).", fftOut.size(), phaseSpectrum.size()));
}
std::transform(fftOut.begin(), std::next(fftOut.begin(), phaseSpectrum.size()), phaseSpectrum.begin(), [](const auto &c) { return static_cast<U>(std::atan2(c.imag(), c.real())); });
}

} // namespace gr::algorithm
#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP
37 changes: 37 additions & 0 deletions test/blocklib/algorithm/fft/fft_types.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP
#define GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP

#include "node.hpp"

namespace gr::algorithm {
template<typename T>
concept ComplexType = std::is_same_v<T, std::complex<float>> || std::is_same_v<T, std::complex<double>>;

template<typename T>
concept DoubleType = std::is_same_v<T, std::complex<double>> || std::is_same_v<T, double>;

// clang-format off
template <typename signedT> struct MakeSigned { using type = signedT;};
template <std::integral signedT> struct MakeSigned<signedT> { using type = std::make_signed_t<signedT>; };
template<typename evalU> struct EvalOutputType { using type1 = evalU; using type2 = typename MakeSigned<evalU>::type;};
template<ComplexType evalU> struct EvalOutputType<evalU> { using type1 = typename evalU::value_type; using type2 = evalU;};

template<typename T>
struct FFTInternal {
using PrecisionType = float;
using InDataType = std::conditional_t<ComplexType<T>, std::complex<float>, float>;
using OutDataType = std::complex<float>;
};

template<DoubleType T>
struct FFTInternal<T> {
using PrecisionType = double;
using InDataType = std::conditional_t<ComplexType<T>, std::complex<double>, double>;
using OutDataType = std::complex<double>;
};

template<typename T>
using FFTOutputType = std::conditional_t<ComplexType<T>, typename EvalOutputType<T>::type1, typename EvalOutputType<T>::type2>;

} // namespace gr::algorithm
#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP
Loading

0 comments on commit 2d1fad7

Please sign in to comment.