Itarative radix-2 FFT algorithm implementation

* using but-reversal permutation. * FFT block contains 3 template parameters: data type and algorithm type. * Add test of different FFT algorithms.
fair-acc · Sep 28, 2023 · 2d1fad7 · 2d1fad7
1 parent b2d5c75
commit 2d1fad7
Show file tree

Hide file tree

Showing 9 changed files with 734 additions and 429 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -85,11 +85,12 @@ target_include_directories(pmtv INTERFACE ${pmt_SOURCE_DIR}/include/)
 
 # FFTW3 is build 2 times for float and double precisions
 SET(FFTW_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/fftw)
-SET(FFTW_CFLAGS "${CFLAGS} -w")
 if (EMSCRIPTEN)
+    SET(FFTW_CFLAGS "${CFLAGS} -w")
     SET(FFTW_CONFIG cd ${FFTW_PREFIX}/src/ && emconfigure ./configure --enable-silent-rules --quiet --disable-fortran --prefix=${FFTW_PREFIX}/install)
     SET(FFTW_BUILD emmake make -j CFLAGS=${FFTW_CFLAGS} --silent V=0 && emmake make install --silent V=0 && emmake make clean --silent V=0)
 else ()
+    SET(FFTW_CFLAGS "${CFLAGS} -w -O3 -march=native -mtune=native")
     SET(FFTW_CONFIG ${FFTW_PREFIX}/src/configure --enable-silent-rules --quiet --disable-fortran --prefix=${FFTW_PREFIX}/install)
     SET(FFTW_BUILD make -j CFLAGS=${FFTW_CFLAGS} --silent V=0 && make install --silent V=0 && make clean --silent V=0)
 endif ()

diff --git a/bench/bm_fft.cpp b/bench/bm_fft.cpp
@@ -1,53 +1,19 @@
-#include "benchmark.hpp"
-
+#include "../test/blocklib/algorithm/fft/fft.hpp"
+#include "../test/blocklib/algorithm/fft/fftw.hpp"
 #include "../test/blocklib/core/fft/fft.hpp"
-
+#include "benchmark.hpp"
 #include <fmt/format.h>
 #include <numbers>
 
-/// This custom implementation of FFT ("compute_fft_v1" and "computeFftV1") is done only for performance comparison with default FFTW implementation.
-
-/**
- * Real-valued fast fourier transform algorithms
- * H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus (1987),
- * in: IEEE Trans on Acoustics, Speech, & Signal Processing, 35
- */
-
-template<typename T>
-    requires gr::blocks::fft::ComplexType<T>
-void
-computeFftV1(std::vector<T> &signal) {
-    const std::size_t N{ signal.size() };
-    for (std::size_t j = 0, rev = 0; j < N; j++) {
-        if (j < rev) std::swap(signal[j], signal[rev]);
-        auto maskLen = static_cast<std::size_t>(std::countr_zero(j + 1) + 1);
-        rev ^= N - (N >> maskLen);
-    }
-
-    for (std::size_t s = 2; s <= N; s *= 2) {
-        const std::size_t m{ s / 2 };
-        const T           w{ exp(T(0., static_cast<typename T::value_type>(-2. * std::numbers::pi) / static_cast<typename T::value_type>(s))) };
-        for (std::size_t k = 0; k < N; k += s) {
-            T wk{ 1., 0. };
-            for (std::size_t j = 0; j < m; j++) {
-                const T t{ wk * signal[k + j + m] };
-                const T u{ signal[k + j] };
-                signal[k + j]     = u + t;
-                signal[k + j + m] = u - t;
-                wk *= w;
-            }
-        }
-    }
-}
-
+/// This custom implementation of FFT is done only for performance comparison with default FFTW implementation.
 /**
  * Fast Fourier-Transform according to Cooley–Tukey
  * reference: https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Pseudocode
  */
 template<typename T>
-    requires gr::blocks::fft::ComplexType<T>
+    requires gr::algorithm::ComplexType<T>
 void
-computeFftV2(std::vector<T> &signal) {
+computeFFFTCooleyTukey(std::vector<T> &signal) {
     const std::size_t N{ signal.size() };
 
     if (N == 1) return;
@@ -59,35 +25,23 @@ computeFftV2(std::vector<T> &signal) {
         odd[i]  = signal[2 * i + 1];
     }
 
-    computeFftV2(even);
-    computeFftV2(odd);
+    computeFFFTCooleyTukey(even);
+    computeFFFTCooleyTukey(odd);
 
-    const typename T::value_type wn{ static_cast<typename T::value_type>(2. * std::numbers::pi) / static_cast<typename T::value_type>(N) };
+    const typename T::value_type wn{ static_cast<typename T::value_type>(2. * std::numbers::pi_v<double>) / static_cast<typename T::value_type>(N) };
     for (std::size_t i = 0; i < N / 2; i++) {
         const T wkn(std::cos(wn * static_cast<typename T::value_type>(i)), std::sin(wn * static_cast<typename T::value_type>(i)));
         signal[i]         = even[i] + wkn * odd[i];
         signal[i + N / 2] = even[i] - wkn * odd[i];
     }
 }
 
-template<typename T>
-    requires gr::blocks::fft::ComplexType<T>
-std::vector<typename T::value_type>
-computeMagnitudeSpectrum(std::vector<T> &fftSignal) {
-    const std::size_t                   N{ fftSignal.size() };
-    std::vector<typename T::value_type> magnitudeSpectrum(N / 2);
-    for (std::size_t i = 0; i < N / 2; i++) {
-        magnitudeSpectrum[i] = std::hypot(fftSignal[i].real(), fftSignal[i].imag()) * static_cast<typename T::value_type>(2.) / static_cast<typename T::value_type>(N);
-    }
-    return magnitudeSpectrum;
-}
-
 template<typename T>
 std::vector<T>
 generateSinSample(std::size_t N, double sampleRate, double frequency, double amplitude) {
     std::vector<T> signal(N);
     for (std::size_t i = 0; i < N; i++) {
-        if constexpr (gr::blocks::fft::ComplexType<T>) {
+        if constexpr (gr::algorithm::ComplexType<T>) {
             signal[i] = { static_cast<typename T::value_type>(amplitude * std::sin(2. * std::numbers::pi * frequency * static_cast<double>(i) / sampleRate)), 0. };
         } else {
             signal[i] = static_cast<T>(amplitude * std::sin(2. * std::numbers::pi * frequency * static_cast<double>(i) / sampleRate));
@@ -98,7 +52,7 @@ generateSinSample(std::size_t N, double sampleRate, double frequency, double amp
 
 template<typename T>
 void
-testFft(bool withMagSpectrum) {
+testFFT() {
     using namespace benchmark;
     using namespace boost::ut::reflection;
 
@@ -110,50 +64,67 @@ testFft(bool withMagSpectrum) {
 
     static_assert(std::has_single_bit(N));
 
-    std::vector<T> signal  = generateSinSample<T>(N, sampleRate, frequency, amplitude);
-    std::string    nameOpt = withMagSpectrum ? "fft+mag" : "fft";
+    std::vector<T> signal = generateSinSample<T>(N, sampleRate, frequency, amplitude);
 
     {
-        gr::blocks::fft::fft<T> fft1{};
+        gr::blocks::fft::FFT<T, gr::algorithm::FFTw<T>> fft1{};
         std::ignore = fft1.settings().set({ { "fftSize", N } });
         std::ignore = fft1.settings().apply_staged_parameters();
         fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());
 
-        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fftw", nameOpt, type_name<T>())) = [&fft1, &withMagSpectrum] {
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - fftw", type_name<T>())) = [&fft1] {
+            fft1.prepareInput();
+            fft1.computeFFT();
+        };
+    }
+    {
+        gr::blocks::fft::FFT<T, gr::algorithm::FFT<T>> fft1;
+        std::ignore = fft1.settings().set({ { "fftSize", N } });
+        std::ignore = fft1.settings().apply_staged_parameters();
+        fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - fft", type_name<T>())) = [&fft1] {
             fft1.prepareInput();
-            fft1.computeFft();
-            if (withMagSpectrum) fft1.computeMagnitudeSpectrum();
+            fft1.computeFFT();
         };
     }
 
     if constexpr (gr::blocks::fft::ComplexType<T>) {
-        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fft_v1", nameOpt, type_name<T>())) = [&signal, &withMagSpectrum] {
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - fftCT", type_name<T>())) = [&signal] {
             auto signalCopy = signal;
-            computeFftV1<T>(signalCopy);
-            if (withMagSpectrum) [[maybe_unused]]
-                auto magnitudeSpectrum = computeMagnitudeSpectrum<T>(signalCopy);
+            computeFFFTCooleyTukey<T>(signalCopy);
         };
+    }
 
-        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - {} fft_v2", nameOpt, type_name<T>())) = [&signal, &withMagSpectrum] {
-            auto signalCopy = signal;
-            computeFftV2<T>(signalCopy);
-            if (withMagSpectrum) [[maybe_unused]]
-                auto magnitudeSpectrum = computeMagnitudeSpectrum<T>(signalCopy);
+    {
+        gr::blocks::fft::FFT<T, gr::algorithm::FFTw<T>> fft1{};
+        std::ignore = fft1.settings().set({ { "fftSize", N } });
+        std::ignore = fft1.settings().apply_staged_parameters();
+        fft1.inputHistory.push_back_bulk(signal.begin(), signal.end());
+        fft1.prepareInput();
+        fft1.computeFFT();
+
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - magnitude", type_name<T>())) = [&fft1] { fft1.computeMagnitudeSpectrum(); };
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - phase", type_name<T>()))     = [&fft1] { fft1.computePhaseSpectrum(); };
+
+        double sum{ 0. };
+        fft1.computeMagnitudeSpectrum();
+        fft1.computePhaseSpectrum();
+        ::benchmark::benchmark<nRepetitions>(fmt::format("{} - dataset", type_name<T>())) = [&fft1, &sum] {
+            const auto ds1 = fft1.createDataset();
+            sum += static_cast<double>(ds1.signal_values[0]);
         };
     }
+
+    ::benchmark::results::add_separator();
 }
 
 inline const boost::ut::suite _fft_bm_tests = [] {
     std::tuple<std::complex<float>, std::complex<double>> complexTypesToTest{};
     std::tuple<float, double>                             realTypesToTest{};
 
-    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(false), ...); }, complexTypesToTest);
-    benchmark::results::add_separator();
-    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(true), ...); }, complexTypesToTest);
-    benchmark::results::add_separator();
-    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(false), ...); }, realTypesToTest);
-    benchmark::results::add_separator();
-    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFft<TArgs>(true), ...); }, realTypesToTest);
+    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFFT<TArgs>(), ...); }, complexTypesToTest);
+    // benchmark::results::add_separator();
+    std::apply([]<class... TArgs>(TArgs... /*args*/) { (testFFT<TArgs>(), ...); }, realTypesToTest);
 };
 
 int

diff --git a/test/blocklib/algorithm/fft/fft.hpp b/test/blocklib/algorithm/fft/fft.hpp
@@ -0,0 +1,112 @@
+#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP
+#define GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP
+
+#include "fft_types.hpp"
+#include "node.hpp"
+#include "window.hpp"
+#include <ranges>
+
+namespace gr::algorithm {
+
+template<typename T>
+    requires(ComplexType<T> || std::floating_point<T>)
+struct FFT {
+    using PrecisionType = typename FFTInternal<T>::PrecisionType;
+    using OutDataType   = typename FFTInternal<T>::OutDataType;
+
+    std::vector<OutDataType> twiddleFactors{};
+    std::size_t              fftSize{ 0 };
+
+    FFT()                   = default;
+    FFT(const FFT &rhs)     = delete;
+    FFT(FFT &&rhs) noexcept = delete;
+    FFT &
+    operator=(const FFT &rhs)
+            = delete;
+    FFT &
+    operator=(FFT &&rhs) noexcept
+            = delete;
+
+    ~FFT() = default;
+
+    void
+    initAll() {
+        precomputeTwiddleFactors();
+    }
+
+    std::vector<OutDataType>
+    computeFFT(const std::vector<T> &in) noexcept {
+        std::vector<OutDataType> out(in.size());
+        computeFFT(in, out);
+        return out;
+    }
+
+    void
+    computeFFT(const std::vector<T> &in, std::vector<OutDataType> &out) noexcept {
+        if (!std::has_single_bit(in.size())) {
+            throw std::runtime_error(fmt::format("Input data must have 2^N samples, input size: ", in.size()));
+        }
+        if (fftSize != in.size()) {
+            fftSize = in.size();
+            initAll();
+        }
+
+        // For the moment no optimization for real data inputs, just create complex with zero imaginary value.
+        if constexpr (!ComplexType<T>) {
+            std::transform(in.begin(), in.end(), out.begin(), [](const auto c) { return OutDataType(c, 0.); });
+        } else {
+            std::copy(in.begin(), in.end(), out.begin());
+        }
+
+        /**
+         * Real-valued fast fourier transform algorithms
+         * H.V. Sorensen, D.L. Jones, M.T. Heideman, C.S. Burrus (1987),
+         * in: IEEE Trans on Acoustics, Speech, & Signal Processing, 35
+         */
+        bitReversalPermutation(out);
+
+        std::size_t omega_kCounter = 0;
+        for (std::size_t s = 2; s <= fftSize; s *= 2) {
+            const auto half_s = s / 2;
+            for (std::size_t k = 0; k < fftSize; k += s) {
+                for (std::size_t j = 0; j < half_s; j++) {
+                    const auto t{ twiddleFactors[omega_kCounter++] * out[k + j + half_s] };
+                    const auto u{ out[k + j] };
+                    out[k + j]          = u + t;
+                    out[k + j + half_s] = u - t;
+                }
+            }
+        }
+    }
+
+private:
+    void
+    bitReversalPermutation(std::vector<OutDataType> &vec) const noexcept {
+        for (std::size_t j = 0, rev = 0; j < fftSize; j++) {
+            if (j < rev) std::swap(vec[j], vec[rev]);
+            auto maskLen = static_cast<std::size_t>(std::countr_zero(j + 1) + 1);
+            rev ^= fftSize - (fftSize >> maskLen);
+        }
+    }
+
+    void
+    precomputeTwiddleFactors() {
+        twiddleFactors.clear();
+        const auto minus2Pi = PrecisionType(-2. * std::numbers::pi);
+        for (std::size_t s = 2; s <= fftSize; s *= 2) {
+            const std::size_t m{ s / 2 };
+            const OutDataType w{ std::exp(OutDataType(0., minus2Pi / static_cast<PrecisionType>(s))) };
+            for (std::size_t k = 0; k < fftSize; k += s) {
+                OutDataType wk{ 1., 0. };
+                for (std::size_t j = 0; j < m; j++) {
+                    twiddleFactors.push_back(wk);
+                    wk *= w;
+                }
+            }
+        }
+    }
+};
+
+} // namespace gr::algorithm
+
+#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_HPP
diff --git a/test/blocklib/algorithm/fft/fft_common.hpp b/test/blocklib/algorithm/fft/fft_common.hpp
@@ -0,0 +1,34 @@
+#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP
+#define GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP
+
+#include "fft_types.hpp"
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+namespace gr::algorithm {
+
+template<ComplexType T, typename U>
+void
+computeMagnitudeSpectrum(const std::vector<T> &fftOut, std::vector<U> &magnitudeSpectrum, std::size_t fftSize, bool outputInDb) {
+    if (fftOut.size() < magnitudeSpectrum.size()) {
+        throw std::invalid_argument(fmt::format("FFT vector size ({}) must be more or equal than magnitude vector size ({}).", fftOut.size(), magnitudeSpectrum.size()));
+    }
+    using PrecisionType = typename T::value_type;
+    std::transform(fftOut.begin(), std::next(fftOut.begin(), magnitudeSpectrum.size()), magnitudeSpectrum.begin(), [fftSize, outputInDb](const auto &c) {
+        const auto mag{ std::hypot(c.real(), c.imag()) * PrecisionType(2.0) / static_cast<PrecisionType>(fftSize) };
+        return static_cast<U>(outputInDb ? PrecisionType(20.) * std::log10(std::abs(mag)) : mag);
+    });
+}
+
+template<ComplexType T, typename U>
+void
+computePhaseSpectrum(const std::vector<T> &fftOut, std::vector<U> &phaseSpectrum) {
+    if (fftOut.size() < phaseSpectrum.size()) {
+        throw std::invalid_argument(fmt::format("FFT vector size ({}) must be more or equal than phase vector size ({}).", fftOut.size(), phaseSpectrum.size()));
+    }
+    std::transform(fftOut.begin(), std::next(fftOut.begin(), phaseSpectrum.size()), phaseSpectrum.begin(), [](const auto &c) { return static_cast<U>(std::atan2(c.imag(), c.real())); });
+}
+
+} // namespace gr::algorithm
+#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_COMMON_HPP
diff --git a/test/blocklib/algorithm/fft/fft_types.hpp b/test/blocklib/algorithm/fft/fft_types.hpp
@@ -0,0 +1,37 @@
+#ifndef GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP
+#define GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP
+
+#include "node.hpp"
+
+namespace gr::algorithm {
+template<typename T>
+concept ComplexType = std::is_same_v<T, std::complex<float>> || std::is_same_v<T, std::complex<double>>;
+
+template<typename T>
+concept DoubleType = std::is_same_v<T, std::complex<double>> || std::is_same_v<T, double>;
+
+// clang-format off
+template <typename signedT> struct MakeSigned { using type = signedT;};
+template <std::integral signedT> struct MakeSigned<signedT> { using type = std::make_signed_t<signedT>; };
+template<typename evalU> struct EvalOutputType { using type1 = evalU; using type2 = typename MakeSigned<evalU>::type;};
+template<ComplexType evalU> struct EvalOutputType<evalU> { using type1 = typename evalU::value_type; using type2 = evalU;};
+
+template<typename T>
+struct FFTInternal {
+    using PrecisionType = float;
+    using InDataType = std::conditional_t<ComplexType<T>, std::complex<float>, float>;
+    using OutDataType = std::complex<float>;
+};
+
+template<DoubleType T>
+struct FFTInternal<T> {
+    using PrecisionType = double;
+    using InDataType = std::conditional_t<ComplexType<T>, std::complex<double>, double>;
+    using OutDataType = std::complex<double>;
+};
+
+template<typename T>
+using FFTOutputType = std::conditional_t<ComplexType<T>, typename EvalOutputType<T>::type1, typename EvalOutputType<T>::type2>;
+
+} // namespace gr::algorithm
+#endif // GRAPH_PROTOTYPE_ALGORITHM_FFT_TYPES_HPP