diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 72237defd..9c6deef45 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,8 +1,12 @@ # Upcoming Release: -* Change reference to `kedro.pipeline.Pipeline` object throughout test suite with `kedro.modular_pipeline.pipeline` factory. +# Release 1.0.2: + +## Bug fixes and other changes +* Change reference to `kedro.pipeline.Pipeline` object throughout test suite with `kedro.modular_pipeline.pipeline` factory. * Relaxed PyArrow range in line with Pandas +* Fixed outdated links to the dill package documentation # Release 1.0.1: diff --git a/kedro-datasets/kedro_datasets/__init__.py b/kedro-datasets/kedro_datasets/__init__.py index d34d03c10..d8bcc2d13 100644 --- a/kedro-datasets/kedro_datasets/__init__.py +++ b/kedro-datasets/kedro_datasets/__init__.py @@ -1,3 +1,3 @@ """``kedro_datasets`` is where you can find all of Kedro's data connectors.""" -__version__ = "1.0.1" +__version__ = "1.0.2" diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index 93e39fb51..4f0ffb4cc 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -111,7 +111,7 @@ def __init__( } def _describe(self) -> Dict[str, Any]: - return dict(**self._request_args) + return {**self._request_args} def _execute_request(self) -> requests.Response: try: diff --git a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py index ae34b30e8..7c45743da 100644 --- a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py +++ b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py @@ -101,12 +101,12 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + } def _load(self) -> List: load_path = get_filepath_str(self._filepath, self._protocol) diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index 9161fa4e6..f02144892 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -134,11 +134,11 @@ def fs_args(self) -> Dict[str, Any]: return fs_args def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - load_args=self._load_args, - save_args=self._save_args, - ) + return { + "filepath": self._filepath, + "load_args": self._load_args, + "save_args": self._save_args, + } def _load(self) -> dd.DataFrame: return dd.read_parquet( diff --git a/kedro-datasets/kedro_datasets/email/message_dataset.py b/kedro-datasets/kedro_datasets/email/message_dataset.py index fdc684504..0b8623f63 100644 --- a/kedro-datasets/kedro_datasets/email/message_dataset.py +++ b/kedro-datasets/kedro_datasets/email/message_dataset.py @@ -141,15 +141,15 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - parser_args=self._parser_args, - save_args=self._save_args, - generator_args=self._generator_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "parser_args": self._parser_args, + "save_args": self._save_args, + "generator_args": self._generator_args, + "version": self._version, + } def _load(self) -> Message: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py index d0ca02722..ba9237909 100644 --- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py +++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py @@ -135,13 +135,13 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: self.invalidate_cache() diff --git a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py index 7be8790e2..7f61909b9 100644 --- a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py +++ b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py @@ -98,12 +98,12 @@ def __init__( self._save_args.update(save_args) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> NoReturn: raise DataSetError(f"Loading not supported for '{self.__class__.__name__}'") diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py index 73268b223..ad86c9a17 100644 --- a/kedro-datasets/kedro_datasets/json/json_dataset.py +++ b/kedro-datasets/kedro_datasets/json/json_dataset.py @@ -115,12 +115,12 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> Any: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py index 5757b08ab..3fc396cb1 100644 --- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py +++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py @@ -75,7 +75,7 @@ class MatplotlibWriter( >>> import matplotlib.pyplot as plt >>> from kedro_datasets.matplotlib import MatplotlibWriter >>> - >>> plots_dict = dict() + >>> plots_dict = {} >>> for colour in ["blue", "green", "red"]: >>> plots_dict[f"{colour}.png"] = plt.figure() >>> plt.plot([1, 2, 3], color=colour) @@ -177,12 +177,12 @@ def __init__( self._overwrite = overwrite def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> NoReturn: raise DataSetError(f"Loading not supported for '{self.__class__.__name__}'") diff --git a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py index 1755674c9..bc8d4f86f 100644 --- a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py @@ -122,13 +122,13 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: super()._release() diff --git a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py index d48c53b5f..2105fb67f 100644 --- a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py @@ -120,13 +120,13 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: super()._release() diff --git a/kedro-datasets/kedro_datasets/networkx/json_dataset.py b/kedro-datasets/kedro_datasets/networkx/json_dataset.py index b7c47c823..8cc436721 100644 --- a/kedro-datasets/kedro_datasets/networkx/json_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/json_dataset.py @@ -127,13 +127,13 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: super()._release() diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py index 2a6366bd0..7b20813f3 100644 --- a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py @@ -139,13 +139,13 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py index aec96c6ed..4a981bc11 100644 --- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py @@ -199,14 +199,14 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - writer_args=self._writer_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "writer_args": self._writer_args, + "version": self._version, + } def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index 9dc56b2b5..1116d4168 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -139,12 +139,12 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py index 02dc31002..c0122a6c0 100644 --- a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py @@ -126,12 +126,12 @@ def __init__( ) def _describe(self) -> Dict[str, Any]: - return dict( - dataset=self._dataset, - table_name=self._table_name, - load_args=self._load_args, - save_args=self._save_args, - ) + return { + "dataset": self._dataset, + "table_name": self._table_name, + "load_args": self._load_args, + "save_args": self._save_args, + } def _load(self) -> pd.DataFrame: sql = f"select * from {self._dataset}.{self._table_name}" # nosec diff --git a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py index 08717fbb3..86e347d70 100644 --- a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py @@ -223,14 +223,14 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - file_format=self._file_format, - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "file_format": self._file_format, + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: super()._release() diff --git a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py index bf43a883e..f11fe320f 100644 --- a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py @@ -135,14 +135,14 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - key=self._key, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "key": self._key, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/pandas/json_dataset.py b/kedro-datasets/kedro_datasets/pandas/json_dataset.py index cea0b985d..d29ef57bd 100644 --- a/kedro-datasets/kedro_datasets/pandas/json_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/json_dataset.py @@ -133,13 +133,13 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py index d0acdc5d1..acb478bd9 100644 --- a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py @@ -148,13 +148,13 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py index 400195719..1400e4981 100644 --- a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py @@ -237,11 +237,11 @@ def _describe(self) -> Dict[str, Any]: save_args = copy.deepcopy(self._save_args) del load_args["table_name"] del save_args["name"] - return dict( - table_name=self._load_args["table_name"], - load_args=load_args, - save_args=save_args, - ) + return { + "table_name": self._load_args["table_name"], + "load_args": load_args, + "save_args": save_args, + } def _load(self) -> pd.DataFrame: engine = self.engines[self._connection_str] # type:ignore @@ -434,12 +434,12 @@ def create_connection(cls, connection_str: str) -> None: def _describe(self) -> Dict[str, Any]: load_args = copy.deepcopy(self._load_args) - return dict( - sql=str(load_args.pop("sql", None)), - filepath=str(self._filepath), - load_args=str(load_args), - execution_options=str(self._execution_options), - ) + return { + "sql": str(load_args.pop("sql", None)), + "filepath": str(self._filepath), + "load_args": str(load_args), + "execution_options": str(self._execution_options), + } def _load(self) -> pd.DataFrame: load_args = copy.deepcopy(self._load_args) diff --git a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py index 5760268a7..ca8fc0dd2 100644 --- a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py @@ -117,13 +117,13 @@ def __init__( self._load_args.pop("storage_options", None) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> pd.DataFrame: load_path = str(self._get_load_path()) diff --git a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py index 611865078..436fba29a 100644 --- a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py +++ b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py @@ -192,14 +192,14 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - backend=self._backend, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "backend": self._backend, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> Any: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/pillow/image_dataset.py b/kedro-datasets/kedro_datasets/pillow/image_dataset.py index 8c2fdc983..ca939b722 100644 --- a/kedro-datasets/kedro_datasets/pillow/image_dataset.py +++ b/kedro-datasets/kedro_datasets/pillow/image_dataset.py @@ -100,12 +100,12 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> Image.Image: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py index 7eaae8da9..f819dd338 100644 --- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py @@ -125,13 +125,13 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> Union[go.Figure, go.FigureWidget]: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/redis/redis_dataset.py b/kedro-datasets/kedro_datasets/redis/redis_dataset.py index f012f0cd7..6d2f80df9 100644 --- a/kedro-datasets/kedro_datasets/redis/redis_dataset.py +++ b/kedro-datasets/kedro_datasets/redis/redis_dataset.py @@ -152,7 +152,7 @@ def __init__( ) def _describe(self) -> Dict[str, Any]: - return dict(key=self._key, **self._redis_from_url_args) + return {"key": self._key, **self._redis_from_url_args} # `redis_db` mypy does not work since it is optional and optional is not # accepted by pickle.loads. diff --git a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py index db45bc12c..34ee6f6a5 100644 --- a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py @@ -100,4 +100,4 @@ def _exists(self) -> bool: return True def _describe(self): - return dict(filepath=str(self._filepath), fs_prefix=self._fs_prefix) + return {"filepath": str(self._filepath), "fs_prefix": self._fs_prefix} diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py index 2250ae337..ca923c72e 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py @@ -364,13 +364,13 @@ def _load_schema_from_file(schema: Dict[str, Any]) -> StructType: ) from exc def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._fs_prefix + str(self._filepath), - file_format=self._file_format, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._fs_prefix + str(self._filepath), + "file_format": self._file_format, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } @staticmethod def _get_spark(): diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index 613b6af5f..08b0666ea 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -121,14 +121,14 @@ def __init__( self._eager_checkpoint = self._save_args.pop("eager_checkpoint", None) or True def _describe(self) -> Dict[str, Any]: - return dict( - database=self._database, - table=self._table, - write_mode=self._write_mode, - table_pk=self._table_pk, - partition_by=self._save_args.get("partitionBy"), - format=self._format, - ) + return { + "database": self._database, + "table": self._table, + "write_mode": self._write_mode, + "table_pk": self._table_pk, + "partition_by": self._save_args.get("partitionBy"), + "format": self._format, + } @staticmethod def _get_spark() -> SparkSession: diff --git a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py index 24bb3220a..aab501f26 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py @@ -156,9 +156,12 @@ def _describe(self) -> Dict[str, Any]: save_properties.pop("password", None) save_args = {**save_args, "properties": save_properties} - return dict( - url=self._url, table=self._table, load_args=load_args, save_args=save_args - ) + return { + "url": self._url, + "table": self._table, + "load_args": load_args, + "save_args": save_args, + } @staticmethod def _get_spark(): diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py index 5c9e0699f..f909c1976 100644 --- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py +++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py @@ -131,13 +131,13 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self): - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> _DO: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py index 63e53b7b4..544aadb06 100644 --- a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py +++ b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py @@ -169,13 +169,13 @@ def _exists(self) -> bool: return self._fs.exists(load_path) def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - load_args=self._load_args, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "load_args": self._load_args, + "save_args": self._save_args, + "version": self._version, + } def _release(self) -> None: super()._release() diff --git a/kedro-datasets/kedro_datasets/text/text_dataset.py b/kedro-datasets/kedro_datasets/text/text_dataset.py index 5ba2ee060..0bb559e29 100644 --- a/kedro-datasets/kedro_datasets/text/text_dataset.py +++ b/kedro-datasets/kedro_datasets/text/text_dataset.py @@ -100,11 +100,11 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "version": self._version, + } def _load(self) -> str: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/kedro_datasets/video/video_dataset.py b/kedro-datasets/kedro_datasets/video/video_dataset.py index 22bd51bc5..07f0e1c8f 100644 --- a/kedro-datasets/kedro_datasets/video/video_dataset.py +++ b/kedro-datasets/kedro_datasets/video/video_dataset.py @@ -350,7 +350,7 @@ def _write_to_filepath(self, video: AbstractVideo, filepath: str) -> None: writer.release() def _describe(self) -> Dict[str, Any]: - return dict(filepath=self._filepath, protocol=self._protocol) + return {"filepath": self._filepath, "protocol": self._protocol} def _exists(self) -> bool: return self._fs.exists(self._filepath) diff --git a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py index 1ab2fa43b..f2a3c2696 100644 --- a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py +++ b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py @@ -113,12 +113,12 @@ def __init__( self._fs_open_args_save = _fs_open_args_save def _describe(self) -> Dict[str, Any]: - return dict( - filepath=self._filepath, - protocol=self._protocol, - save_args=self._save_args, - version=self._version, - ) + return { + "filepath": self._filepath, + "protocol": self._protocol, + "save_args": self._save_args, + "version": self._version, + } def _load(self) -> Dict: load_path = get_filepath_str(self._get_load_path(), self._protocol) diff --git a/kedro-datasets/tests/pandas/test_sql_dataset.py b/kedro-datasets/tests/pandas/test_sql_dataset.py index c882751b1..a1c6839d6 100644 --- a/kedro-datasets/tests/pandas/test_sql_dataset.py +++ b/kedro-datasets/tests/pandas/test_sql_dataset.py @@ -40,21 +40,21 @@ def sql_file(tmp_path: PosixPath): @pytest.fixture(params=[{}]) def table_data_set(request): - kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + kwargs = {"table_name": TABLE_NAME, "credentials": {"con": CONNECTION}} kwargs.update(request.param) return SQLTableDataSet(**kwargs) @pytest.fixture(params=[{}]) def query_data_set(request): - kwargs = dict(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + kwargs = {"sql": SQL_QUERY, "credentials": {"con": CONNECTION}} kwargs.update(request.param) return SQLQueryDataSet(**kwargs) @pytest.fixture(params=[{}]) def query_file_data_set(request, sql_file): - kwargs = dict(filepath=sql_file, credentials=dict(con=CONNECTION)) + kwargs = {"filepath": sql_file, "credentials": {"con": CONNECTION}} kwargs.update(request.param) return SQLQueryDataSet(**kwargs) @@ -74,7 +74,7 @@ def test_empty_table_name(self): """Check the error when instantiating with an empty table""" pattern = r"'table\_name' argument cannot be empty\." with pytest.raises(DataSetError, match=pattern): - SQLTableDataSet(table_name="", credentials=dict(con=CONNECTION)) + SQLTableDataSet(table_name="", credentials={"con": CONNECTION}) def test_empty_connection(self): """Check the error when instantiating with an empty @@ -84,7 +84,7 @@ def test_empty_connection(self): r"Please provide a SQLAlchemy connection string\." ) with pytest.raises(DataSetError, match=pattern): - SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con="")) + SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": ""}) def test_driver_missing(self, mocker): """Check the error when the sql driver is missing""" @@ -93,7 +93,7 @@ def test_driver_missing(self, mocker): side_effect=ImportError("No module named 'mysqldb'"), ) with pytest.raises(DataSetError, match=ERROR_PREFIX + "mysqlclient"): - SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": CONNECTION}) def test_unknown_sql(self): """Check the error when unknown sql dialect is provided; @@ -102,7 +102,7 @@ def test_unknown_sql(self): """ pattern = r"The SQL dialect in your connection is not supported by SQLAlchemy" with pytest.raises(DataSetError, match=pattern): - SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=FAKE_CONN_STR)) + SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": FAKE_CONN_STR}) def test_unknown_module(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy @@ -113,7 +113,7 @@ def test_unknown_module(self, mocker): ) pattern = ERROR_PREFIX + r"No module named \'unknown\_module\'" with pytest.raises(DataSetError, match=pattern): - SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": CONNECTION}) def test_str_representation_table(self, table_data_set): """Test the data set instance string representation""" @@ -131,7 +131,7 @@ def test_table_exists(self, mocker, table_data_set): self._assert_sqlalchemy_called_once() @pytest.mark.parametrize( - "table_data_set", [{"load_args": dict(schema="ingested")}], indirect=True + "table_data_set", [{"load_args": {"schema": "ingested"}}], indirect=True ) def test_table_exists_schema(self, mocker, table_data_set): """Test `exists` method invocation with DB schema provided""" @@ -162,7 +162,7 @@ def test_save_default_index(self, mocker, table_data_set, dummy_dataframe): ) @pytest.mark.parametrize( - "table_data_set", [{"save_args": dict(index=True)}], indirect=True + "table_data_set", [{"save_args": {"index": True}}], indirect=True ) def test_save_overwrite_index(self, mocker, table_data_set, dummy_dataframe): """Test writing DataFrame index as a column""" @@ -173,7 +173,7 @@ def test_save_overwrite_index(self, mocker, table_data_set, dummy_dataframe): ) @pytest.mark.parametrize( - "table_data_set", [{"save_args": dict(name="TABLE_B")}], indirect=True + "table_data_set", [{"save_args": {"name": "TABLE_B"}}], indirect=True ) def test_save_ignore_table_name_override( self, mocker, table_data_set, dummy_dataframe @@ -192,7 +192,7 @@ def test_single_connection(self, dummy_dataframe, mocker): """Test to make sure multiple instances use the same connection object.""" mocker.patch("pandas.read_sql_table") dummy_to_sql = mocker.patch.object(dummy_dataframe, "to_sql") - kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + kwargs = {"table_name": TABLE_NAME, "credentials": {"con": CONNECTION}} first = SQLTableDataSet(**kwargs) unique_connection = first.engines[CONNECTION] @@ -216,11 +216,11 @@ def test_create_connection_only_once(self, mocker): (but different tables, for example) only create a connection once. """ mock_engine = mocker.patch("kedro_datasets.pandas.sql_dataset.create_engine") - first = SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + first = SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": CONNECTION}) assert len(first.engines) == 1 second = SQLTableDataSet( - table_name="other_table", credentials=dict(con=CONNECTION) + table_name="other_table", credentials={"con": CONNECTION} ) assert len(second.engines) == 1 assert len(first.engines) == 1 @@ -232,13 +232,11 @@ def test_multiple_connections(self, mocker): only create one connection per db. """ mock_engine = mocker.patch("kedro_datasets.pandas.sql_dataset.create_engine") - first = SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) + first = SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": CONNECTION}) assert len(first.engines) == 1 second_con = f"other_{CONNECTION}" - second = SQLTableDataSet( - table_name=TABLE_NAME, credentials=dict(con=second_con) - ) + second = SQLTableDataSet(table_name=TABLE_NAME, credentials={"con": second_con}) assert len(second.engines) == 2 assert len(first.engines) == 2 @@ -254,7 +252,7 @@ def test_empty_query_error(self): r"Please provide a sql query or path to a sql query file\." ) with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql="", filepath="", credentials=dict(con=CONNECTION)) + SQLQueryDataSet(sql="", filepath="", credentials={"con": CONNECTION}) def test_empty_con_error(self): """Check the error when instantiating with empty connection string""" @@ -263,7 +261,7 @@ def test_empty_con_error(self): r"a SQLAlchemy connection string" ) with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con="")) + SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": ""}) @pytest.mark.parametrize( "query_data_set, has_execution_options", @@ -319,7 +317,7 @@ def test_load_driver_missing(self, mocker): "kedro_datasets.pandas.sql_dataset.create_engine", side_effect=_err ) with pytest.raises(DataSetError, match=ERROR_PREFIX + "mysqlclient"): - SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": CONNECTION}) def test_invalid_module(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy @@ -330,7 +328,7 @@ def test_invalid_module(self, mocker): ) pattern = ERROR_PREFIX + r"Invalid module some\_module" with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": CONNECTION}) def test_load_unknown_module(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy @@ -341,14 +339,14 @@ def test_load_unknown_module(self, mocker): ) pattern = ERROR_PREFIX + r"No module named \'unknown\_module\'" with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": CONNECTION}) def test_load_unknown_sql(self): """Check the error when unknown SQL dialect is provided in the connection string""" pattern = r"The SQL dialect in your connection is not supported by SQLAlchemy" with pytest.raises(DataSetError, match=pattern): - SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=FAKE_CONN_STR)) + SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": FAKE_CONN_STR}) def test_save_error(self, query_data_set, dummy_dataframe): """Check the error when trying to save to the data set""" @@ -390,12 +388,12 @@ def test_create_connection_only_once(self, mocker): tables and execution options, for example) only create a connection once. """ mock_engine = mocker.patch("kedro_datasets.pandas.sql_dataset.create_engine") - first = SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + first = SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": CONNECTION}) assert len(first.engines) == 1 # second engine has identical params to the first one # => no new engine should be created - second = SQLQueryDataSet(sql=SQL_QUERY, credentials=dict(con=CONNECTION)) + second = SQLQueryDataSet(sql=SQL_QUERY, credentials={"con": CONNECTION}) mock_engine.assert_called_once_with(CONNECTION) assert second.engines == first.engines assert len(first.engines) == 1 @@ -404,7 +402,7 @@ def test_create_connection_only_once(self, mocker): # => no new engine should be created third = SQLQueryDataSet( sql="a different query", - credentials=dict(con=CONNECTION), + credentials={"con": CONNECTION}, execution_options=EXECUTION_OPTIONS, ) assert mock_engine.call_count == 1 @@ -414,7 +412,7 @@ def test_create_connection_only_once(self, mocker): # fourth engine has a different connection string # => a new engine has to be created fourth = SQLQueryDataSet( - sql=SQL_QUERY, credentials=dict(con="an other connection string") + sql=SQL_QUERY, credentials={"con": "an other connection string"} ) assert mock_engine.call_count == 2 assert fourth.engines == first.engines