Skip to content

Commit

Permalink
replace - from output column names with _
Browse files Browse the repository at this point in the history
  • Loading branch information
manu-sj committed Jun 17, 2024
1 parent 64f34cd commit 9891900
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
2 changes: 1 addition & 1 deletion python/hsfs/hopsworks_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def _get_output_column_names(self) -> str:
`List[str]`: List of feature names for the transformed columns
"""
_BASE_COLUMN_NAME = (
f'{self.function_name}_{"-".join(self.transformation_features)}_'
f'{self.function_name}_{"_".join(self.transformation_features)}_'
)
if len(self.return_types) > 1:
return [f"{_BASE_COLUMN_NAME}{i}" for i in range(len(self.return_types))]
Expand Down
12 changes: 6 additions & 6 deletions python/tests/engine/test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -3354,12 +3354,12 @@ def plus_two(col1, col2):
)

# Assert
assert all(result.columns == ["plus_two_col1-col2_0", "plus_two_col1-col2_1"])
assert all(result.columns == ["plus_two_col1_col2_0", "plus_two_col1_col2_1"])
assert len(result) == 2
assert result["plus_two_col1-col2_0"][0] == 2
assert result["plus_two_col1-col2_0"][1] == 3
assert result["plus_two_col1-col2_1"][0] == 12
assert result["plus_two_col1-col2_1"][1] == 13
assert result["plus_two_col1_col2_0"][0] == 2
assert result["plus_two_col1_col2_0"][1] == 3
assert result["plus_two_col1_col2_1"][0] == 12
assert result["plus_two_col1_col2_1"][1] == 13

def test_apply_transformation_function_polars(self, mocker):
# Arrange
Expand Down Expand Up @@ -3854,7 +3854,7 @@ def test_materialization_kafka_first_job_execution(self, mocker):
args="defaults tests_offsets",
await_termination=False,
)

def test_materialization_kafka_skip_offsets(self, mocker):
# Arrange
mocker.patch("hsfs.engine.python.Engine._get_kafka_config", return_value={})
Expand Down
30 changes: 15 additions & 15 deletions python/tests/engine/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE_2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down Expand Up @@ -937,7 +937,7 @@ def test_save_stream_dataframe(self, mocker, backend_fixtures):
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
0
][1]
== f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}-checkpoint"
== f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}_checkpoint"
)
assert (
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
Expand Down Expand Up @@ -1053,7 +1053,7 @@ def test_save_stream_dataframe_query_name(self, mocker, backend_fixtures):
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
0
][1]
== "/Projects/test_project_name/Resources/test_query_name-checkpoint"
== "/Projects/test_project_name/Resources/test_query_name_checkpoint"
)
assert (
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
Expand Down Expand Up @@ -1293,7 +1293,7 @@ def test_save_stream_dataframe_await_termination(self, mocker, backend_fixtures)
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
0
][1]
== f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}-checkpoint"
== f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}_checkpoint"
)
assert (
mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
Expand Down Expand Up @@ -2456,7 +2456,7 @@ def test_time_series_split_date(self, mocker):
d = {
"col_0": [1, 2],
"col_1": ["test_1", "test_2"],
"event_time": ["2017-03-04", "2017-03-05"],
"event_time": ["2017_03_04", "2017_03_05"],
}
df = pd.DataFrame(data=d)

Expand Down Expand Up @@ -2516,7 +2516,7 @@ def test_time_series_split_timestamp(self, mocker):
d = {
"col_0": [1, 2],
"col_1": ["test_1", "test_2"],
"event_time": ["2017-03-04", "2017-03-05"],
"event_time": ["2017_03_04", "2017_03_05"],
}
df = pd.DataFrame(data=d)

Expand Down Expand Up @@ -3809,7 +3809,7 @@ def __init__(self, label, index):
"double": ["1"],
"timestamp": [1641340800000],
"boolean": ["False"],
"date": ["2022-01-27"],
"date": ["2022_01_27"],
"binary": ["1"],
"array<string>": [["123"]],
"struc": [LabelIndex("0", "1")],
Expand Down Expand Up @@ -4212,11 +4212,11 @@ def test_setup_s3_hadoop_conf(self, mocker):
"fs.s3a.secret.key", s3_connector.secret_key
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
"fs.s3a.server-side-encryption-algorithm",
"fs.s3a.server_side_encryption_algorithm",
s3_connector.server_encryption_algorithm,
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
"fs.s3a.server-side-encryption-key", s3_connector.server_encryption_key
"fs.s3a.server_side_encryption_key", s3_connector.server_encryption_key
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
"fs.s3a.aws.credentials.provider",
Expand Down Expand Up @@ -4487,8 +4487,8 @@ def test(col1, col2):
expected_df = pd.DataFrame(
data={
"col_1": ["test_1", "test_2"],
"test_col_0-col_2_0": [2, 3],
"test_col_0-col_2_1": [12, 13],
"test_col_0_col_2_0": [2, 3],
"test_col_0_col_2_1": [12, 13],
}
) # todo why it doesnt return int?

Expand All @@ -4514,7 +4514,7 @@ def test_setup_gcp_hadoop_conf(self, mocker):

content = (
'{"type": "service_account", "project_id": "test", "private_key_id": "123456", '
'"private_key": "-----BEGIN PRIVATE KEY-----test-----END PRIVATE KEY-----", '
'"private_key": "_____BEGIN PRIVATE KEY_____test_____END PRIVATE KEY_____", '
'"client_email": "test@project.iam.gserviceaccount.com"}'
)
credentialsFile = "keyFile.json"
Expand Down Expand Up @@ -4563,7 +4563,7 @@ def test_setup_gcp_hadoop_conf(self, mocker):
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
"fs.gs.auth.service.account.private.key",
"-----BEGIN PRIVATE KEY-----test-----END PRIVATE KEY-----",
"_____BEGIN PRIVATE KEY_____test_____END PRIVATE KEY_____",
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.unset.assert_any_call(
"fs.gs.encryption.algorithm"
Expand All @@ -4586,7 +4586,7 @@ def test_setup_gcp_hadoop_conf_algorithm(self, mocker):

content = (
'{"type": "service_account", "project_id": "test", "private_key_id": "123456", '
'"private_key": "-----BEGIN PRIVATE KEY-----test-----END PRIVATE KEY-----", '
'"private_key": "_____BEGIN PRIVATE KEY_____test_____END PRIVATE KEY_____", '
'"client_email": "test@project.iam.gserviceaccount.com"}'
)
credentialsFile = "keyFile.json"
Expand Down Expand Up @@ -4650,7 +4650,7 @@ def test_setup_gcp_hadoop_conf_algorithm(self, mocker):
)
mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
"fs.gs.auth.service.account.private.key",
"-----BEGIN PRIVATE KEY-----test-----END PRIVATE KEY-----",
"_____BEGIN PRIVATE KEY_____test_____END PRIVATE KEY_____",
)

def test_get_unique_values(self):
Expand Down
10 changes: 5 additions & 5 deletions python/tests/test_hopswork_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_generate_output_column_names_multiple_argument_one_output_type(self):
def test_func(col1, col2, col3):
return col1 + 1

assert test_func._get_output_column_names() == ["test_func_col1-col2-col3_"]
assert test_func._get_output_column_names() == ["test_func_col1_col2_col3_"]

def test_generate_output_column_names_single_argument_multiple_output_type(self):
@udf([int, float, int])
Expand All @@ -360,9 +360,9 @@ def test_func(col1, col2, col3):
)

assert test_func._get_output_column_names() == [
"test_func_col1-col2-col3_0",
"test_func_col1-col2-col3_1",
"test_func_col1-col2-col3_2",
"test_func_col1_col2_col3_0",
"test_func_col1_col2_col3_1",
"test_func_col1_col2_col3_2",
]

def test_create_pandas_udf_return_schema_from_list_one_output_type(self):
Expand Down Expand Up @@ -422,7 +422,7 @@ def test_func(col1, col2):
test_dataframe["column1"], test_dataframe["column2"]
)

assert all(result.columns == ["test_func_col1-col2_0", "test_func_col1-col2_1"])
assert all(result.columns == ["test_func_col1_col2_0", "test_func_col1_col2_1"])
assert result.values.tolist() == [[2, 12], [3, 22], [4, 32], [5, 42]]

def test_HopsworkUDf_call_one_argument(self):
Expand Down

0 comments on commit 9891900

Please sign in to comment.