From 6ac5f51bf0c5257ab1c727d80c05595d666afbc2 Mon Sep 17 00:00:00 2001 From: Joe Birr-Pixton Date: Sun, 27 Oct 2024 18:42:11 +0000 Subject: [PATCH] try prefetch --- graviola/src/low/aarch64/aes.rs | 3 +++ graviola/src/low/aarch64/cpu.rs | 24 ++++++++++++++++++++++++ graviola/src/low/aarch64/sha256.rs | 11 ++--------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/graviola/src/low/aarch64/aes.rs b/graviola/src/low/aarch64/aes.rs index 433f8128..5eddade9 100644 --- a/graviola/src/low/aarch64/aes.rs +++ b/graviola/src/low/aarch64/aes.rs @@ -9,6 +9,7 @@ // cf. the x86_64 version, on which this one is based. use crate::low; +use crate::low::aarch64::cpu; use core::arch::aarch64::*; pub(crate) enum AesKey { @@ -55,6 +56,8 @@ impl AesKey { let mut by8 = cipher_inout.chunks_exact_mut(128); for cipher8 in by8.by_ref() { + cpu::prefetch_rw(cipher8.as_ptr()); + cpu::prefetch_rw(cipher8.as_ptr().add(128)); counter = vaddq_u32(counter, inc); let b0 = vrev32q_u8(vreinterpretq_u8_u32(counter)); counter = vaddq_u32(counter, inc); diff --git a/graviola/src/low/aarch64/cpu.rs b/graviola/src/low/aarch64/cpu.rs index 88abbf3e..93e69020 100644 --- a/graviola/src/low/aarch64/cpu.rs +++ b/graviola/src/low/aarch64/cpu.rs @@ -113,3 +113,27 @@ mod dit { } } } + +/// Read-only prefetch hint. +pub(in crate::low) fn prefetch_ro(ptr: *const T) { + // SAFETY: inline assembly + unsafe { + core::arch::asm!( + "prfm pldl1strm, [{ptr}]", + ptr = in(reg) ptr, + options(readonly, nostack) + ); + } +} + +/// Read-write prefetch hint. +pub(in crate::low) fn prefetch_rw(ptr: *const T) { + // SAFETY: inline assembly + unsafe { + core::arch::asm!( + "prfm pstl1keep, [{ptr}]", + ptr = in(reg) ptr, + options(readonly, nostack) + ); + } +} diff --git a/graviola/src/low/aarch64/sha256.rs b/graviola/src/low/aarch64/sha256.rs index 7fb9966f..b3ad2bfb 100644 --- a/graviola/src/low/aarch64/sha256.rs +++ b/graviola/src/low/aarch64/sha256.rs @@ -1,6 +1,7 @@ // Written for Graviola by Joe Birr-Pixton, 2024. // SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 +use crate::low::aarch64::cpu; use core::arch::aarch64::*; pub(crate) fn sha256_compress_blocks(state: &mut [u32; 8], blocks: &[u8]) { @@ -60,7 +61,7 @@ unsafe fn sha256(state: &mut [u32; 8], blocks: &[u8]) { let state1_prev = state1; // prefetch next block - prefetch(block.as_ptr().add(64)); + cpu::prefetch_ro(block.as_ptr().add(64)); let msg0 = vld1q_u32(block[0..].as_ptr() as *const _); let msg1 = vld1q_u32(block[16..].as_ptr() as *const _); @@ -100,14 +101,6 @@ unsafe fn sha256(state: &mut [u32; 8], blocks: &[u8]) { vst1q_u32(state[4..8].as_mut_ptr(), state1); } -unsafe fn prefetch(ptr: *const T) { - core::arch::asm!( - "prfm pldl1strm, [{ptr}]", - ptr = in(reg) ptr, - options(readonly, nostack) - ); -} - #[repr(align(16))] struct Aligned([u32; 64]);