Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Spark array_append function #12043

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions velox/functions/lib/ArrayFunctions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/functions/Macros.h"

namespace facebook::velox::functions {

/// This class implements the array concat function.
///
/// DEFINITION:
/// concat(array1, array2, ..., arrayN) → array
/// Concatenates the arrays array1, array2, ..., arrayN. This function
/// provides the same functionality as the SQL-standard concatenation
/// operator (||).
///
/// Note:
/// - For compatibility with Presto a maximum arity of 254 is enforced.
template <typename TExec, typename T>
struct ArrayConcatFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExec)

static constexpr int32_t kMinArity = 2;
static constexpr int32_t kMaxArity = 254;

void call(
out_type<Array<T>>& out,
const arg_type<Variadic<Array<T>>>& arrays) {
VELOX_USER_CHECK_GE(
arrays.size(),
kMinArity,
"There must be {} or more arguments to concat",
kMinArity);
VELOX_USER_CHECK_LE(
arrays.size(), kMaxArity, "Too many arguments for concat function");
int64_t elementCount = 0;
for (const auto& array : arrays) {
elementCount += array.value().size();
}
out.reserve(elementCount);
for (const auto& array : arrays) {
out.add_items(array.value());
}
}

void call(
out_type<Array<T>>& out,
const arg_type<Array<T>>& array,
const arg_type<T>& element) {
out.reserve(array.size() + 1);
out.add_items(array);
out.push_back(element);
}

void call(
out_type<Array<T>>& out,
const arg_type<T>& element,
const arg_type<Array<T>>& array) {
out.reserve(array.size() + 1);
out.push_back(element);
out.add_items(array);
}
};

} // namespace facebook::velox::functions
1 change: 1 addition & 0 deletions velox/functions/lib/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
add_executable(
velox_functions_lib_test
ArrayConcatTest.cpp
ApproxMostFrequentStreamSummaryTest.cpp
CheckNestedNullsTest.cpp
DateTimeFormatterTest.cpp
Expand Down
56 changes: 0 additions & 56 deletions velox/functions/prestosql/ArrayFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,62 +660,6 @@ struct ArrayNormalizeFunction {
}
};

/// This class implements the array concat function.
///
/// DEFINITION:
/// concat(array1, array2, ..., arrayN) → array
/// Concatenates the arrays array1, array2, ..., arrayN. This function
/// provides the same functionality as the SQL-standard concatenation
/// operator (||).
///
/// Note:
/// - For compatibility with Presto a maximum arity of 254 is enforced.
template <typename TExec, typename T>
struct ArrayConcatFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExec)

static constexpr int32_t kMinArity = 2;
static constexpr int32_t kMaxArity = 254;

void call(
out_type<Array<T>>& out,
const arg_type<Variadic<Array<T>>>& arrays) {
VELOX_USER_CHECK_GE(
arrays.size(),
kMinArity,
"There must be {} or more arguments to concat",
kMinArity);
VELOX_USER_CHECK_LE(
arrays.size(), kMaxArity, "Too many arguments for concat function");
int64_t elementCount = 0;
for (const auto& array : arrays) {
elementCount += array.value().size();
}
out.reserve(elementCount);
for (const auto& array : arrays) {
out.add_items(array.value());
}
}

void call(
out_type<Array<T>>& out,
const arg_type<Array<T>>& array,
const arg_type<T>& element) {
out.reserve(array.size() + 1);
out.add_items(array);
out.push_back(element);
}

void call(
out_type<Array<T>>& out,
const arg_type<T>& element,
const arg_type<Array<T>>& array) {
out.reserve(array.size() + 1);
out.push_back(element);
out.add_items(array);
}
};

inline void checkIndexArrayTrim(int64_t size, int64_t arraySize) {
if (size < 0) {
VELOX_USER_FAIL("size must not be negative: {}", size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/ArrayFunctions.h"
#include "velox/functions/lib/ArrayFunctions.h"

namespace facebook::velox::functions {
namespace {
Expand Down
1 change: 0 additions & 1 deletion velox/functions/prestosql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ add_executable(
ArrayAnyMatchTest.cpp
ArrayAverageTest.cpp
ArrayCombinationsTest.cpp
ArrayConcatTest.cpp
ArrayConstructorTest.cpp
ArrayContainsTest.cpp
ArrayCumSumTest.cpp
Expand Down
27 changes: 27 additions & 0 deletions velox/functions/sparksql/registration/RegisterArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/functions/lib/ArrayFunctions.h"
#include "velox/functions/lib/ArrayShuffle.h"
#include "velox/functions/lib/RegistrationHelpers.h"
#include "velox/functions/lib/Repeat.h"
Expand Down Expand Up @@ -108,7 +109,33 @@ inline void registerArrayRemoveFunctions(const std::string& prefix) {
Varchar>({prefix + "array_remove"});
}

template <typename T>
inline void registerArrayAppendFunctions(const std::string& prefix) {
registerFunction<
ParameterBinder<ArrayConcatFunction, T>,
Array<T>,
Array<T>,
T>({prefix + "array_append"});
}

void registerArrayAppendFunctions(const std::string& prefix) {
registerArrayAppendFunctions<Generic<T1>>(prefix);
// Fast paths for primitives types.
registerArrayAppendFunctions<int8_t>(prefix);
registerArrayAppendFunctions<int16_t>(prefix);
registerArrayAppendFunctions<int32_t>(prefix);
registerArrayAppendFunctions<int64_t>(prefix);
registerArrayAppendFunctions<int128_t>(prefix);
registerArrayAppendFunctions<float>(prefix);
registerArrayAppendFunctions<double>(prefix);
registerArrayAppendFunctions<bool>(prefix);
registerArrayAppendFunctions<Varchar>(prefix);
registerArrayAppendFunctions<Timestamp>(prefix);
registerArrayAppendFunctions<Date>(prefix);
}

void registerArrayFunctions(const std::string& prefix) {
registerArrayAppendFunctions(prefix);
registerArrayJoinFunctions(prefix);
registerArrayMinMaxFunctions(prefix);
registerArrayRemoveFunctions(prefix);
Expand Down
Loading