Skip to content

Commit

Permalink
update datatype serde ut
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan committed Jan 8, 2025
1 parent d637e16 commit 38f0d27
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 5 deletions.
4 changes: 3 additions & 1 deletion be/src/vec/data_types/data_type_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ bool DataTypeArray::equals(const IDataType& rhs) const {
nested->equals(*static_cast<const DataTypeArray&>(rhs).nested);
}

// here we should remove nullable, otherwise here always be 1
size_t DataTypeArray::get_number_of_dimensions() const {
const DataTypeArray* nested_array = typeid_cast<const DataTypeArray*>(nested.get());
const DataTypeArray* nested_array =
typeid_cast<const DataTypeArray*>(remove_nullable(nested).get());
if (!nested_array) return 1;
return 1 +
nested_array
Expand Down
48 changes: 48 additions & 0 deletions be/test/vec/data_types/common_data_type_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class CommonDataTypeTest : public ::testing::Test {
bool is_value_represented_by_number = false;
PColumnMeta* pColumnMeta = nullptr;
DataTypeSerDeSPtr serde = nullptr;
bool is_value_unambiguously_represented_in_contiguous_memory_region = false;
Field default_field;
};
void SetUp() override {}
Expand All @@ -133,6 +134,8 @@ class CommonDataTypeTest : public ::testing::Test {
ASSERT_EQ(data_type->text_can_contain_only_valid_utf8(),
meta_info.text_can_contain_only_valid_utf8);
ASSERT_EQ(data_type->have_maximum_size_of_value(), meta_info.have_maximum_size_of_value);
ASSERT_EQ(data_type->is_value_unambiguously_represented_in_contiguous_memory_region(),
meta_info.is_value_unambiguously_represented_in_contiguous_memory_region);
if (is_decimal(data_type) || data_type->is_nullable() || is_struct(data_type) ||
is_nothing(data_type) || is_number(data_type) || is_columned_as_number(data_type) ||
is_ip(data_type)) {
Expand Down Expand Up @@ -217,7 +220,9 @@ class CommonDataTypeTest : public ::testing::Test {
VectorBufferWriter buffer_writer(*ser_col.get());
for (int i = 0; i < mutableColumn->size(); ++i) {
data_type->to_string(*mutableColumn, i, buffer_writer);
std::string res = data_type->to_string(*mutableColumn, i);
buffer_writer.commit();
EXPECT_EQ(res, ser_col->get_data_at(i).to_string());
}
// check ser_col to assert_column and check same with mutableColumn
auto assert_column_1 = data_type->create_column();
Expand All @@ -230,6 +235,49 @@ class CommonDataTypeTest : public ::testing::Test {
}
}

// datatype serialize | deserialize assert is only used Block::serialize | deserialize which for PBlock
// which happened in multiple BE shuffle data
void serialize_deserialize_assert(MutableColumns& columns, DataTypes data_types) {
// first make columns has same rows
size_t max_row = columns[0]->size();
for (int i = 1; i < columns.size(); ++i) {
max_row = std::max(max_row, columns[i]->size());
}
for (int i = 0; i < columns.size(); ++i) {
if (columns[i]->size() < max_row) {
columns[i]->resize(max_row);
}
}
// wrap columns into block
auto block = std::make_shared<Block>();
for (int i = 0; i < columns.size(); ++i) {
block->insert({columns[i]->get_ptr(), data_types[i], ""});
}
// nt be_exec_version, PBlock* pblock, size_t* uncompressed_bytes,
// size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type,
size_t be_exec_version = 2;
auto pblock = std::make_unique<PBlock>();
size_t uncompressed_bytes = 0;
size_t compressed_bytes = 0;
segment_v2::CompressionTypePB compression_type = segment_v2::CompressionTypePB::ZSTD;
Status st = block->serialize(be_exec_version, pblock.get(), &uncompressed_bytes,
&compressed_bytes, compression_type);
ASSERT_EQ(st.ok(), true);
// deserialize
auto block_1 = std::make_shared<Block>();
st = block_1->deserialize(*pblock);
ASSERT_EQ(st.ok(), true);
// check block_1 and block is same
for (int i = 0; i < block->rows(); ++i) {
auto& col = block->get_by_position(i);
auto& col_1 = block_1->get_by_position(i);
ASSERT_EQ(col.column->size(), col_1.column->size());
for (int j = 0; j < col.column->size(); ++j) {
ASSERT_EQ(col.column->operator[](j), col_1.column->operator[](j));
}
}
}

// should all datatype is compare?
void assert_compare_behavior(DataTypePtr l_dt, DataTypePtr& r_dt) {
ASSERT_TRUE(l_dt->is_comparable());
Expand Down
25 changes: 21 additions & 4 deletions be/test/vec/data_types/data_type_array_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,15 +320,17 @@ TEST_F(DataTypeArrayTest, MetaInfoTest) {
for (int i = 0; i < array_types.size(); i++) {
auto& type = array_types[i];
auto& desc = array_descs[i];
auto array_type = assert_cast<const DataTypeArray*>(remove_nullable(type).get());
auto nested_type =
assert_cast<const DataTypeArray*>(remove_nullable(type).get())->get_nested_type();

TypeDescriptor arr_type_descriptor = {PrimitiveType::TYPE_ARRAY};
arr_type_descriptor.add_sub_type(desc[0].type_desc.children[0]);
auto col_meta = std::make_shared<PColumnMeta>();
array_type->to_pb_column_meta(col_meta.get());
Array a;
a.push_back(nested_type->get_default());
col_meta->set_type(PGenericType_TypeId_LIST);

DataTypeMetaInfo arr_meta_info_to_assert = {
.type_id = TypeIndex::Array,
.type_as_type_descriptor = &arr_type_descriptor,
Expand All @@ -344,6 +346,9 @@ TEST_F(DataTypeArrayTest, MetaInfoTest) {
.is_null_literal = false,
.is_value_represented_by_number = false,
.pColumnMeta = col_meta.get(),
.is_value_unambiguously_represented_in_contiguous_memory_region =
nested_type
->is_value_unambiguously_represented_in_contiguous_memory_region(),
.default_field = a,
};
DataTypePtr arr = remove_nullable(type);
Expand Down Expand Up @@ -482,6 +487,11 @@ TEST_F(DataTypeArrayTest, SerdeMysqlTest) {
CommonDataTypeSerdeTest::assert_mysql_format(array_columns, serdes);
}

TEST_F(DataTypeArrayTest, SerializeDeserializeTest) {
// insert from data csv and assert insert result
CommonDataTypeTest::serialize_deserialize_assert(array_columns, array_types);
}

TEST_F(DataTypeArrayTest, SerdeArrowTest) {
// todo. fix decimal256 serde
MutableColumns array_cols;
Expand Down Expand Up @@ -524,13 +534,20 @@ TEST_F(DataTypeArrayTest, SerdeArrowTest) {

//================== datatype for array ut test ==================
TEST_F(DataTypeArrayTest, GetNumberOfDimensionsTest) {
for (int i = 0; i < array_types.size() - 5; i++) {
// for array-scalar
for (int i = 0; i < 18; i++) {
auto& type = array_types[i];
auto array_type = assert_cast<const DataTypeArray*>(remove_nullable(type).get());
// array dimension is only for array to nested array , if array nested map or struct, the dimension also be is 1
EXPECT_EQ(array_type->get_number_of_dimensions(), 1) << "for type: " << type->get_name();
}
// for array-array
for (int i = 18; i < 36; i++) {
auto& type = array_types[i];
auto desc = array_descs[i];
auto array_type = assert_cast<const DataTypeArray*>(remove_nullable(type).get());
// array dimension is only for array to nested array , if array nested map or struct, the dimension also be is 1
EXPECT_EQ(array_type->get_number_of_dimensions(), desc.size())
<< "for type: " << type->get_name() << " desc size: " << desc.size();
EXPECT_EQ(array_type->get_number_of_dimensions(), 2) << "for type: " << type->get_name();
}
for (int i = 36; i < 41; i++) {
auto& type = array_types[i];
Expand Down

0 comments on commit 38f0d27

Please sign in to comment.