Skip to content

Commit

Permalink
Make commits from Paul compile
Browse files Browse the repository at this point in the history
  • Loading branch information
crsib committed Oct 2, 2023
1 parent f2d97ca commit dc72a7e
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,14 @@ inline std::pair<float, float> sincos_ss(float angle)
return std::make_pair(_mm_cvtss_f32(res.first), _mm_cvtss_f32(res.second));
}

inline __m128 norm(__m128 x, __m128 y)
{
return _mm_add_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y));
}

inline __m128 magnitude(__m128 x, __m128 y)
{
return _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y)));
return _mm_sqrt_ps(norm(x, y));
}

inline float sqrt_ss(float x)
Expand Down Expand Up @@ -373,4 +378,51 @@ inline void convert_to_complex_parallel_simd_aligned(
_mm_set_ss(magnitudes[i]), _mm_set_ss(phases[i]))[0];
}


inline void rotate_parallel_simd_aligned(
const float* oldPhase, const float* newPhase, std::complex<float>* output,
int n)
{
constexpr int N = 4;
constexpr int byte_size = sizeof(float);

for (int i = 0; i <= n - N; i += N)
{
auto [sin, cos] = sincos_ps(
_mm_sub_ps(
_mm_load_ps(newPhase + i),
_mm_load_ps(oldPhase + i)));


alignas(
16) float temp_real[N] = { real(output[i + 0]), real(output[i + 1]),
real(output[i + 2]), real(output[i + 3]) };

alignas(
16) float temp_imag[N] = { imag(output[i + 0]), imag(output[i + 1]),
imag(output[i + 2]), imag(output[i + 3]) };

auto rp = _mm_load_ps(temp_real);
auto ip = _mm_load_ps(temp_imag);


// We need to calculate (rp, ip) * (cos, sin) -> (rp*cos - ip*sin, rp*sin + ip*cos)

auto out_rp = _mm_sub_ps(_mm_mul_ps(rp, cos), _mm_mul_ps(ip, sin));
auto out_ip = _mm_add_ps(_mm_mul_ps(rp, sin), _mm_mul_ps(ip, cos));

_mm_store_ps(temp_real, out_rp);
_mm_store_ps(temp_imag, out_ip);

for (int j = 0; j < N; ++j)
output[i + j] = { temp_real[j], temp_imag[j] };
}
// deal with last partial packet
for (int i = n & (~(N - 1)); i < n; ++i)
{
auto theta = newPhase[i] - oldPhase[i];
output[i] *= std::complex<float>(cosf(theta), sinf(theta));
}
}

} // namespace simd_complex_conversions
16 changes: 16 additions & 0 deletions libraries/lib-time-and-pitch/StaffPad/VectorOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ inline void convertPolarToCartesian(
{
simd_complex_conversions::convert_to_complex_parallel_simd_aligned(srcMag, srcPh, dst, n);
}

inline void calcNorms(const std::complex<float>* src, float* dst, int32_t n)
{
simd_complex_conversions::perform_parallel_simd_aligned(
src, dst, n,
[](const __m128 rp, const __m128 ip, __m128& out)
{ out = simd_complex_conversions::norm(rp, ip); });
}

inline void rotate(
const float* oldPhase, const float* newPhase, std::complex<float>* dst,
int32_t n)
{
simd_complex_conversions::rotate_parallel_simd_aligned(
oldPhase, newPhase, dst, n);
}
#else
inline void calcPhases(const std::complex<float>* src, float* dst, int32_t n)
{
Expand Down

0 comments on commit dc72a7e

Please sign in to comment.