From d45e25d7c8ad3d0917ffdfccaf66c1ebd426b55a Mon Sep 17 00:00:00 2001
From: dijopaul <dijopaul@cadence.com>
Date: Mon, 7 Oct 2024 05:15:25 -0700
Subject: [PATCH] Changing name space of optimized ops; Remove unused ops from
 file

---
 backends/cadence/aot/functions_hifi.yaml      |  14 +--
 backends/cadence/hifi/operators/op_add.cpp    | 102 +++-------------
 backends/cadence/hifi/operators/op_div.cpp    | 114 ++----------------
 backends/cadence/hifi/operators/op_mul.cpp    |  76 ++----------
 .../cadence/hifi/operators/op_sigmoid.cpp     |  15 ++-
 backends/cadence/hifi/operators/op_sub.cpp    |  98 +++------------
 backends/cadence/hifi/operators/op_tanh.cpp   |  16 ++-
 7 files changed, 88 insertions(+), 347 deletions(-)

diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
index bab47eccff..3a274adaca 100644
--- a/backends/cadence/aot/functions_hifi.yaml
+++ b/backends/cadence/aot/functions_hifi.yaml
@@ -25,7 +25,7 @@
 - op: add.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::add_out
+      kernel_name: impl::HiFi::add_out
 
 - op: bmm.out
   kernels:
@@ -45,12 +45,12 @@
 - op: div.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::div_out
+      kernel_name: impl::HiFi::div_out
 
 - op: div.out_mode
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::div_out_mode
+      kernel_name: impl::HiFi::div_out_mode
 
 - op: embedding.out
   kernels:
@@ -65,7 +65,7 @@
 - op: mul.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::mul_out
+      kernel_name: impl::HiFi::mul_out
 
 - op: permute_copy.out
   kernels:
@@ -75,7 +75,7 @@
 - op: sigmoid.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::sigmoid_out
+      kernel_name: impl::HiFi::sigmoid_out
 
 - op: slice_copy.Tensor_out
   kernels:
@@ -90,12 +90,12 @@
 - op: sub.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::sub_out
+      kernel_name: impl::HiFi::sub_out
 
 - op: tanh.out
   kernels:
     - arg_meta: null
-      kernel_name: torch::executor::tanh_out
+      kernel_name: impl::HiFi::tanh_out
 
 - op: view_copy.out
   kernels:
diff --git a/backends/cadence/hifi/operators/op_add.cpp b/backends/cadence/hifi/operators/op_add.cpp
index 38585b4005..883cc74d6c 100644
--- a/backends/cadence/hifi/operators/op_add.cpp
+++ b/backends/cadence/hifi/operators/op_add.cpp
@@ -14,11 +14,19 @@
 #include <executorch/runtime/platform/assert.h>
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-namespace torch {
-namespace executor {
+using exec_aten::Scalar;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::runtime::can_cast;
+using executorch::runtime::CppTypeToScalarType;
+using executorch::runtime::KernelRuntimeContext;
+using torch::executor::Error;
+
+namespace impl {
+namespace HiFi { 
 namespace native {
-namespace {
 
+namespace {
 template <
     bool can_cast,
     typename CTYPE_A,
@@ -35,7 +43,7 @@ template <
 struct AddInner<true, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT> {
   static void
   run(const Tensor& a, const Tensor& b, CTYPE_IN alpha_val, Tensor& out) {
-    apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+    torch::executor::apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
         // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue)
         [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
           CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
@@ -89,7 +97,7 @@ Tensor& add_out(
 
   ScalarType a_type = a.scalar_type();
   ScalarType b_type = b.scalar_type();
-  ScalarType alpha_type = utils::get_scalar_dtype(alpha);
+  ScalarType alpha_type = torch::executor::native::utils::get_scalar_dtype(alpha);
   ScalarType common_type = promoteTypes(a_type, b_type, /*half_to_float*/ true);
   ScalarType out_type = out.scalar_type();
 
@@ -98,7 +106,7 @@ Tensor& add_out(
       ctx, check_alpha_type(alpha_type, common_type), InvalidArgument, out);
       
   float alpha_val;
-  utils::extract_scalar(alpha, &alpha_val);
+  torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
   constexpr auto name = "add.out";
   constexpr int kNnlibMaxDim = 4; /*fallback if broadcast and dim > 4 */
@@ -168,7 +176,7 @@ Tensor& add_out(
           promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
       ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
       CTYPE_IN alpha_val;
-      utils::extract_scalar(alpha, &alpha_val);
+      torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
       ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
         AddInner<
@@ -184,83 +192,7 @@ Tensor& add_out(
   return out;
 }
 
-Tensor& add_scalar_out(
-    KernelRuntimeContext& ctx,
-    const Tensor& a,
-    const Scalar& b,
-    const Scalar& alpha,
-    Tensor& out) {
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, a.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK(
-      ctx,
-      executorch::runtime::tensor_is_realhbbf16_type(out),
-      InvalidArgument,
-      out);
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_dim_order(a, out), InvalidArgument, out);
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType alpha_type = utils::get_scalar_dtype(alpha);
-  ScalarType common_type =
-      utils::promote_type_with_scalar(a_type, b, /*half_to_float*/ false);
-  ScalarType out_type = out.scalar_type();
-
-  ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
-  ET_KERNEL_CHECK(
-      ctx, check_alpha_type(alpha_type, common_type), InvalidArgument, out);
-
-  /*When Half first compute the result in float precision 
-  and then downcast to half*/
-  if (common_type == ScalarType::Half) {
-    common_type = ScalarType::Float;
-  }
-
-  constexpr auto name = "add.Scalar_out";
-
-  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
-      using CTYPE_IN = typename utils::promote_type_with_scalar_type<
-          CTYPE_A,
-          CTYPE_B,
-          /*half_to_float*/ true>::type;
-      ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
-
-      CTYPE_B b_val;
-      utils::extract_scalar(b, &b_val);
-      CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
-
-      CTYPE_IN alpha_val;
-      utils::extract_scalar(alpha, &alpha_val);
-
-      using CTYPE_OUT = typename std::conditional<
-          std::is_same<CTYPE_A, internal::F2>::value,
-          internal::F2,
-          CTYPE_IN>::type;
-
-      apply_unary_map_fn(
-          [b_casted, alpha_val](const CTYPE_A val_a) {
-            CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-            CTYPE_IN value = a_casted + alpha_val * b_casted;
-            return static_cast<CTYPE_OUT>(value);
-          },
-          a.const_data_ptr<CTYPE_A>(),
-          out.mutable_data_ptr<CTYPE_OUT>(),
-          out.numel());
-    });
-  });
-
-  return out;
-}
 
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/hifi/operators/op_div.cpp b/backends/cadence/hifi/operators/op_div.cpp
index 057132e7bc..41220e5d0b 100644
--- a/backends/cadence/hifi/operators/op_div.cpp
+++ b/backends/cadence/hifi/operators/op_div.cpp
@@ -15,8 +15,14 @@
 #include <cmath> 
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-namespace torch {
-namespace executor {
+using exec_aten::Scalar;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::aten::RuntimeContext;
+using torch::executor::Error;
+
+namespace impl {
+namespace HiFi { 
 namespace native {
 
 namespace {
@@ -127,7 +133,7 @@ div_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
     ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div.out", CTYPE_B, [&]() {
       ET_SWITCH_FLOAT_TYPES(common_type, ctx, "div.out", CTYPE_IN, [&]() {
         ET_SWITCH_FLOAT_TYPES(out_type, ctx, "div.out", CTYPE_OUT, [&]() {
-          apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+          torch::executor::apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
               [](const CTYPE_A val_a, const CTYPE_B val_b) {
                 CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
                 CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
@@ -242,7 +248,7 @@ Tensor& div_out_mode(
     ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div.out_mode", CTYPE_B, [&]() {
       ET_SWITCH_FLOAT_TYPES(common_type, ctx, "div.out_mode", CTYPE_IN, [&]() {
         ET_SWITCH_REAL_TYPES(out_type, ctx, "div.out_mode", CTYPE_OUT, [&]() {
-          apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+          torch::executor::apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
               [mode](const CTYPE_A val_a, const CTYPE_B val_b) {
                 CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
                 CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
@@ -265,103 +271,7 @@ Tensor& div_out_mode(
   return out;
 }
 
-Tensor& div_scalar_out(
-    RuntimeContext& ctx,
-    const Tensor& a,
-    const Scalar& b,
-    Tensor& out) {
-  (void)ctx;
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, a.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType common_type = isFloatingType(a_type) ? a_type : ScalarType::Float;
-  ScalarType out_type = out.scalar_type();
-
-  ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
-
-  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div.Scalar_out", CTYPE_A, [&]() {
-    ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "div.Scalar_out", CTYPE_B, [&]() {
-      ET_SWITCH_FLOAT_TYPES(out_type, ctx, "div.Scalar_out", CTYPE, [&]() {
-        CTYPE_B b_val;
-        utils::extract_scalar(b, &b_val);
-        CTYPE b_casted = static_cast<CTYPE>(b_val);
-
-        apply_unary_map_fn(
-            [b_casted](const CTYPE_A val_a) {
-              CTYPE a_casted = static_cast<CTYPE>(val_a);
-              CTYPE value = a_casted / b_casted;
-              return static_cast<CTYPE>(value);
-            },
-            a.const_data_ptr<CTYPE_A>(),
-            out.mutable_data_ptr<CTYPE>(),
-            out.numel());
-      });
-    });
-  });
-
-  return out;
-}
-
-Tensor& div_scalar_mode_out(
-    RuntimeContext& ctx,
-    const Tensor& a,
-    const Scalar& b,
-    exec_aten::optional<exec_aten::string_view> mode,
-    Tensor& out) {
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, a.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-  
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType common_type = utils::promote_type_with_scalar(a_type, b);
-  ScalarType out_type = out.scalar_type();
-  
-  ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
-  
-  constexpr auto name = "div.Scalar_mode_out";
-  
-  ET_SWITCH_REALB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
-      ET_SWITCH_REAL_TYPES(out_type, ctx, name, CTYPE, [&]() {
-        CTYPE_B b_val;
-        utils::extract_scalar(b, &b_val);
-        CTYPE b_casted = static_cast<CTYPE>(b_val);
-  
-        apply_unary_map_fn(
-            [b_casted, mode](const CTYPE_A val_a) {
-              CTYPE a_casted = static_cast<CTYPE>(val_a);
-              CTYPE value = a_casted / b_casted;
-              if (mode.has_value() && mode.value() == "trunc") {
-                value = std::trunc(value);
-              } else if (mode.has_value() && mode.value() == "floor") {
-                value = utils::floor_divide(a_casted, b_casted);
-              }
-              return value;
-            },
-            a.const_data_ptr<CTYPE_A>(),
-            out.mutable_data_ptr<CTYPE>(),
-            out.numel());
-      });
-    });
-  });
-
-  return out;
-}
 
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/hifi/operators/op_mul.cpp b/backends/cadence/hifi/operators/op_mul.cpp
index 05173e831c..9200d9802e 100644
--- a/backends/cadence/hifi/operators/op_mul.cpp
+++ b/backends/cadence/hifi/operators/op_mul.cpp
@@ -13,9 +13,16 @@
 #include <executorch/runtime/platform/assert.h>
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-
-namespace torch {
-namespace executor { 
+using exec_aten::Scalar;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::aten::RuntimeContext;
+using executorch::runtime::can_cast;
+using executorch::runtime::CppTypeToScalarType;
+using torch::executor::Error;
+
+namespace impl {
+namespace HiFi { 
 namespace native {
 
 namespace {
@@ -34,7 +41,7 @@ template <
     typename CTYPE_OUT>
 struct MulInner<true, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT> {
   static void run(const Tensor& a, const Tensor& b, Tensor& out) {
-    apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+    torch::executor::apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
         // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue)
         [](const CTYPE_A val_a, const CTYPE_B val_b) {
           CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
@@ -155,63 +162,6 @@ mul_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
   return out;
 }
 
-Tensor& mul_scalar_out(
-    RuntimeContext& ctx,
-    const Tensor& a,
-    const Scalar& b,
-    Tensor& out) {
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, a.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK(ctx, tensor_is_realhb_type(out), InvalidArgument, out);
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType common_type =
-      utils::promote_type_with_scalar(a_type, b, /*half_to_float*/ false);
-  ScalarType out_type = out.scalar_type();
-
-  ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
-
-  /*When Half first compute the result in float precision 
-    and then downcast to half*/
-  if (common_type == ScalarType::Half) {
-    common_type = ScalarType::Float;
-  }
-
-  ET_SWITCH_REALHB_TYPES(a_type, ctx, "mul.Scalar_out", CTYPE_A, [&]() {
-    ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "mul.Scalar_out", CTYPE_B, [&]() {
-      ET_SWITCH_REALB_TYPES(
-          common_type, ctx, "mul.Scalar_out", CTYPE_IN, [&]() {
-            ET_SWITCH_REALHB_TYPES(
-                out_type, ctx, "mul.Scalar_out", CTYPE_OUT, [&]() {
-                  CTYPE_B b_val;
-                  utils::extract_scalar(b, &b_val);
-                  CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
-
-                  apply_unary_map_fn(
-                      [b_casted](const CTYPE_A val_a) {
-                        CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-                        CTYPE_IN value = a_casted * b_casted;
-                        return static_cast<CTYPE_OUT>(value);
-                      },
-                      a.const_data_ptr<CTYPE_A>(),
-                      out.mutable_data_ptr<CTYPE_OUT>(),
-                      out.numel());
-                });
-          });
-    });
-  });
-
-  return out;
-}
-
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/hifi/operators/op_sigmoid.cpp b/backends/cadence/hifi/operators/op_sigmoid.cpp
index 6c54e053f9..fa408d4b0a 100644
--- a/backends/cadence/hifi/operators/op_sigmoid.cpp
+++ b/backends/cadence/hifi/operators/op_sigmoid.cpp
@@ -12,8 +12,13 @@
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-namespace torch {
-namespace executor {
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::aten::RuntimeContext;
+using torch::executor::Error;
+
+namespace impl {
+namespace HiFi { 
 namespace native {
 
 using Tensor = exec_aten::Tensor;
@@ -51,7 +56,7 @@ Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
 
   ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
     ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
-      apply_unary_map_fn(
+      torch::executor::apply_unary_map_fn(
           [](const CTYPE_IN val_in) {
             // perform math in double to preserve precision
             double in_casted = static_cast<double>(val_in);
@@ -67,6 +72,6 @@ Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
   return out;
 }
 
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/hifi/operators/op_sub.cpp b/backends/cadence/hifi/operators/op_sub.cpp
index d297bc0c69..b9f35cafdd 100644
--- a/backends/cadence/hifi/operators/op_sub.cpp
+++ b/backends/cadence/hifi/operators/op_sub.cpp
@@ -14,11 +14,20 @@
 #include <executorch/runtime/platform/assert.h>
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-namespace torch {
-namespace executor {
+using exec_aten::Scalar;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::runtime::can_cast;
+using executorch::runtime::CppTypeToScalarType;
+using executorch::aten::RuntimeContext;
+using torch::executor::Error;
+
+
+namespace impl {
+namespace HiFi { 
 namespace native {
-namespace {
 
+namespace {
 template <
     bool can_cast,
     typename CTYPE_A,
@@ -35,7 +44,7 @@ template <
 struct SubInner<true, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT> {
   static void
   run(const Tensor& a, const Tensor& b, CTYPE_IN alpha_val, Tensor& out) {
-    apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+    torch::executor::apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
         // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue)
         [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
           CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
@@ -83,7 +92,7 @@ Tensor& sub_out(
 
   ScalarType a_type = a.scalar_type();
   ScalarType b_type = b.scalar_type();
-  ScalarType alpha_type = utils::get_scalar_dtype(alpha);
+  ScalarType alpha_type = torch::executor::native::utils::get_scalar_dtype(alpha);
   ScalarType common_type = promoteTypes(a_type, b_type, /*half_to_float*/ true);
   ScalarType out_type = out.scalar_type();
 
@@ -92,7 +101,7 @@ Tensor& sub_out(
       ctx, check_alpha_type(alpha_type, common_type), InvalidArgument, out);
       
   float alpha_val;
-  utils::extract_scalar(alpha, &alpha_val);
+  torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
 
   constexpr auto name = "sub.out";
   constexpr int kNnlibMaxDim = 4; /*fallback if broadcast and dim > 4 */
@@ -166,7 +175,7 @@ Tensor& sub_out(
        promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
    ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
    CTYPE_IN alpha_val;
-   utils::extract_scalar(alpha, &alpha_val);
+   torch::executor::native::utils::extract_scalar(alpha, &alpha_val);
    ET_SWITCH_REALH_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
      SubInner<
          can_cast<CTYPE_IN, CTYPE_OUT>::value,
@@ -181,77 +190,6 @@ Tensor& sub_out(
   return out;
 }
 
-Tensor& sub_scalar_out(
-    RuntimeContext& ctx,
-    const Tensor& a,
-    const Scalar& b,
-    const Scalar& alpha,
-    Tensor& out) {
-  (void)ctx;
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, a.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK(ctx, tensor_is_realh_type(out), InvalidArgument, out);
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = utils::get_scalar_dtype(b);
-  ScalarType alpha_type = utils::get_scalar_dtype(alpha);
-  ScalarType common_type =
-      utils::promote_type_with_scalar(a_type, b, /*half_to_float*/ false);
-  ScalarType out_type = out.scalar_type();
-
-  ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out);
-  ET_KERNEL_CHECK(ctx, canCast(alpha_type, common_type), InvalidArgument, out);
-
-  /*When Half first compute the result in float precision 
-  and then downcast to half*/
-  if (common_type == ScalarType::Half) {
-    common_type = ScalarType::Float;
-  }
-
-  constexpr auto name = "sub.Scalar_out";
-
-  ET_SWITCH_REALH_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_SCALAR_OBJ_REAL_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
-      using CTYPE_IN = typename utils::promote_type_with_scalar_type<
-          CTYPE_A,
-          CTYPE_B,
-          /*half_to_float*/ true>::type;
-      ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
-
-      CTYPE_B b_val;
-      utils::extract_scalar(b, &b_val);
-      CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
-
-      CTYPE_IN alpha_val;
-      utils::extract_scalar(alpha, &alpha_val);
-
-      using CTYPE_OUT = typename std::conditional<
-          std::is_same<CTYPE_A, internal::F2>::value,
-          internal::F2,
-          CTYPE_IN>::type;
-
-      apply_unary_map_fn(
-          [b_casted, alpha_val](const CTYPE_A val_a) {
-            CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-            CTYPE_IN value = a_casted - alpha_val * b_casted;
-            return static_cast<CTYPE_OUT>(value);
-          },
-          a.const_data_ptr<CTYPE_A>(),
-          out.mutable_data_ptr<CTYPE_OUT>(),
-          out.numel());
-    });
-  });
-
-  return out;
-}
-
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/hifi/operators/op_tanh.cpp b/backends/cadence/hifi/operators/op_tanh.cpp
index f06b57a368..a80450b8d8 100644
--- a/backends/cadence/hifi/operators/op_tanh.cpp
+++ b/backends/cadence/hifi/operators/op_tanh.cpp
@@ -11,10 +11,16 @@
 #include <cmath>
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-namespace torch {
-namespace executor {
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using executorch::aten::RuntimeContext;
+using torch::executor::Error;
+
+namespace impl {
+namespace HiFi { 
 namespace native {
 
+
 Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
 
   bool optimized = 1;
@@ -29,10 +35,10 @@ Tensor& tanh_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
     return out;
   }
 
-  return internal::unary_ufunc_realhb_to_floath(std::tanh, ctx, in, out);
+  return torch::executor::native::internal::unary_ufunc_realhb_to_floath(std::tanh, ctx, in, out);
 
 }
 
+} // namespace impl
+} // namespace HiFi
 } // namespace native
-} // namespace executor
-} // namespace torch