Skip to content

Commit

Permalink
Added avx difference u8
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Sep 26, 2024
1 parent d3b55b2 commit b401cd9
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 12 deletions.
11 changes: 0 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = "0.25.2"
fast_morphology = {path = "../", features = ["image"]}
imageproc = "0.25.0"
opencv = {version = "0.93.0", features = ["imgproc", "clang-runtime"]}
opencv = {version = "0.93.0", features = ["imgproc"]}

[dev-dependencies]
criterion = {version = "0.5.1", features = ["html_reports"]}
Expand Down
131 changes: 131 additions & 0 deletions src/avx/gradient_unsigned_8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright (c) Radzivon Bartoshyk. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub fn morph_gradient_avx(dilation: &[u8], erosion: &[u8], dst: &mut [u8]) {
unsafe {
morph_gradient_avx_impl(dilation, erosion, dst);
}
}
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn morph_gradient_avx_impl(dilation: &[u8], erosion: &[u8], dst: &mut [u8]) {
if dilation.len() != erosion.len() || erosion.len() != dst.len() {
panic!(
"All array must match in size for gradient but received v0: {}, v1: {}, v2: {}",
dilation.len(),
erosion.len(),
dst.len()
);
}
let length = dilation.len();
let mut _cx = 0usize;
unsafe {
while _cx + 128 < length {
let v0_ptr = dilation.get_unchecked(_cx..).as_ptr();
let v0_set = (
_mm256_loadu_si256(v0_ptr as *const __m256i),
_mm256_loadu_si256(v0_ptr.add(32) as *const __m256i),
_mm256_loadu_si256(v0_ptr.add(64) as *const __m256i),
_mm256_loadu_si256(v0_ptr.add(96) as *const __m256i),
);
let v1_ptr = erosion.get_unchecked(_cx..).as_ptr();
let v1_set = (
_mm256_loadu_si256(v1_ptr as *const __m256i),
_mm256_loadu_si256(v1_ptr.add(32) as *const __m256i),
_mm256_loadu_si256(v1_ptr.add(64) as *const __m256i),
_mm256_loadu_si256(v1_ptr.add(96) as *const __m256i),
);
let result_set = (
_mm256_subs_epu8(v0_set.0, v1_set.0),
_mm256_subs_epu8(v0_set.1, v1_set.1),
_mm256_subs_epu8(v0_set.2, v1_set.2),
_mm256_subs_epu8(v0_set.3, v1_set.3),
);
let v_dst_ptr = dst.get_unchecked_mut(_cx..).as_mut_ptr();
_mm256_storeu_si256(v_dst_ptr as *mut __m256i, result_set.0);
_mm256_storeu_si256(v_dst_ptr.add(32) as *mut __m256i, result_set.1);
_mm256_storeu_si256(v_dst_ptr.add(64) as *mut __m256i, result_set.2);
_mm256_storeu_si256(v_dst_ptr.add(96) as *mut __m256i, result_set.3);
_cx += 128;
}

while _cx + 64 < length {
let v0_ptr = dilation.get_unchecked(_cx..).as_ptr();
let v0_set = (
_mm256_loadu_si256(v0_ptr as *const __m256i),
_mm256_loadu_si256(v0_ptr.add(32) as *const __m256i),
);
let v1_ptr = erosion.get_unchecked(_cx..).as_ptr();
let v1_set = (
_mm256_loadu_si256(v1_ptr as *const __m256i),
_mm256_loadu_si256(v1_ptr.add(32) as *const __m256i),
);
let result_set = (
_mm256_subs_epu8(v0_set.0, v1_set.0),
_mm256_subs_epu8(v0_set.1, v1_set.1),
);
let v_dst_ptr = dst.get_unchecked_mut(_cx..).as_mut_ptr();
_mm256_storeu_si256(v_dst_ptr as *mut __m256i, result_set.0);
_mm256_storeu_si256(v_dst_ptr.add(32) as *mut __m256i, result_set.1);
_cx += 64;
}

while _cx + 32 < length {
let v0_ptr = dilation.get_unchecked(_cx..).as_ptr();
let v0_set = _mm256_loadu_si256(v0_ptr as *const __m256i);
let v1_ptr = erosion.get_unchecked(_cx..).as_ptr();
let v1_set = _mm256_loadu_si256(v1_ptr as *const __m256i);
let result_set = _mm256_subs_epu8(v0_set, v1_set);
let v_dst_ptr = dst.get_unchecked_mut(_cx..).as_mut_ptr();
_mm256_storeu_si256(v_dst_ptr as *mut __m256i, result_set);
_cx += 32;
}

while _cx + 16 < length {
let v0_ptr = dilation.get_unchecked(_cx..).as_ptr();
let v0_set = _mm_loadu_si128(v0_ptr as *const __m128i);
let v1_ptr = erosion.get_unchecked(_cx..).as_ptr();
let v1_set = _mm_loadu_si128(v1_ptr as *const __m128i);
let result_set = _mm_subs_epu8(v0_set, v1_set);
let v_dst_ptr = dst.get_unchecked_mut(_cx..).as_mut_ptr();
_mm_storeu_si128(v_dst_ptr as *mut __m128i, result_set);
_cx += 16;
}

while _cx < length {
*dst.get_unchecked_mut(_cx) = dilation
.get_unchecked(_cx)
.saturating_sub(*erosion.get_unchecked(_cx));
_cx += 1;
}
}
}
32 changes: 32 additions & 0 deletions src/avx/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (c) Radzivon Bartoshyk. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

mod gradient_unsigned_8;

pub use gradient_unsigned_8::morph_gradient_avx;
5 changes: 5 additions & 0 deletions src/difference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::avx::morph_gradient_avx;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::neon::morph_gradient_neon;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Expand Down Expand Up @@ -67,6 +69,9 @@ impl MorphGradient<u8> for u8 {
if std::arch::is_x86_feature_detected!("sse4.1") {
_dispatcher = morph_gradient_sse;
}
if std::arch::is_x86_feature_detected!("avx2") {
_dispatcher = morph_gradient_avx;
}
}
_dispatcher(dilation, erosion, dst)
}
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ extern crate core;

mod arena;
mod arena_roi;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod avx;
mod border_mode;
mod difference;
#[cfg(feature = "image")]
Expand All @@ -43,6 +45,7 @@ mod morph_base;
mod morph_gray_alpha;
mod morph_rgb;
mod morph_rgba;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
mod neon;
mod op;
mod op_f32;
Expand Down
3 changes: 3 additions & 0 deletions src/op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use std::time::Instant;
use crate::border_mode::BorderMode;
use crate::difference::MorphGradient;
use crate::morph_gray_alpha::make_morphology_gray_alpha;
Expand Down Expand Up @@ -554,7 +555,9 @@ pub fn morphology_rgb(
border_mode,
threading_policy,
)?;
let start_time = Instant::now();
u8::morph_gradient(&dilation, &erosion, dst);
println!("end time {:?}", start_time.elapsed());
Ok(())
}
MorphExOp::TopHat => {
Expand Down

0 comments on commit b401cd9

Please sign in to comment.