Skip to content

Commit

Permalink
fix(datasets): Revert the optional dependencies back to setup.py (#310)
Browse files Browse the repository at this point in the history
Move optional dependencies for kedro-datasets to setup.py

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
Co-authored-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
  • Loading branch information
DimedS and astrojuanlu authored Aug 17, 2023
1 parent d109b32 commit b9f990d
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 234 deletions.
246 changes: 12 additions & 234 deletions kedro-datasets/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,238 +5,16 @@ build-backend = "setuptools.build_meta"
[project]
name = "kedro-datasets"
authors = [
{name = "Kedro"}
{name = "Kedro"}
]
description = "Kedro-Datasets is where you can find all of Kedro's data connectors."
requires-python = ">=3.7, <3.11"
license = {text = "Apache Software License (Apache 2.0)"}
dependencies = [
"kedro>=0.16",
"lazy_loader",
]
dynamic = ["readme", "version"]

[project.optional-dependencies]
# Base dependencies
hdfs-base = ["hdfs>=2.5.8, <3.0"]
pandas-base = ["pandas>=1.3, <3.0"]
plotly-base = ["plotly>=4.8.0, <6.0"]
s3fs-base = ["s3fs>=0.3.0, <0.5"]
spark-base = ["pyspark>=2.2, <4.0"]


# Datasets dependencies
api = ["kedro-datasets[api.APIDataSet]"]
"api.APIDataSet" = ["requests~=2.20"]

biosequence = ["kedro-datasets[biosequence.BioSequenceDataSet]"]
"biosequence.BioSequenceDataSet" = ["biopython~=1.73"]

dask = ["kedro-datasets[dask.ParquetDataSet]"]
"dask.ParquetDataSet" = ["dask[complete]>=2021.10", "triad>=0.6.7, <1.0"]

databricks = ["kedro-datasets[databricks.ManagedTableDataSet]"]
"databricks.ManagedTableDataSet" = ["kedro-datasets[spark-base,pandas-base]", "delta-spark~=1.2.1"]

geopandas = ["kedro-datasets[geopandas.GeoJSONDataSet]"]
"geopandas.GeoJSONDataSet" = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]

holoviews = ["kedro-datasets[holoviews.HoloviewsWriter]"]
"holoviews.HoloviewsWriter" = ["holoviews~=1.13.0"]

matplotlib = ["kedro-datasets[matplotlib.MatplotlibWriter]"]
"matplotlib.MatplotlibWriter" = ["matplotlib>=3.0.3, <4.0"]

networkx = ["kedro-datasets[networkx.NetworkXDataSet]"]
"networkx.NetworkXDataSet" = ["networkx~=2.4"]

pandas = [
"""kedro-datasets[\
pandas.CSVDataSet,\
pandas.ExcelDataSet,\
pandas.DeltaTableDataSet,\
pandas.FeatherDataSet,\
pandas.GenericDataSet,\
pandas.GBQTableDataSet,\
pandas.GBQQueryDataSet,\
pandas.HDFDataSet,\
pandas.JSONDataSet,\
pandas.ParquetDataSet,\
pandas.SQLTableDataSet,\
pandas.SQLQueryDataSet,\
pandas.XMLDataSet\
]"""
]
"pandas.CSVDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.ExcelDataSet" = ["kedro-datasets[pandas-base]", "openpyxl>=3.0.6, <4.0"]
"pandas.DeltaTableDataSet" = ["kedro-datasets[pandas-base]", "deltalake>=0.10.0"]
"pandas.FeatherDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.GenericDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.GBQTableDataSet" = [
"kedro-datasets[pandas-base]",
"pandas-gbq>=0.12.0, <0.18.0"
]
"pandas.GBQQueryDataSet" = [
"kedro-datasets[pandas-base]",
"pandas-gbq>=0.12.0, <0.18.0"
]
"pandas.HDFDataSet" = [
"kedro-datasets[pandas-base]",
"tables~=3.6.0; platform_system == 'Windows'",
"tables~=3.6; platform_system != 'Windows'"
]
"pandas.JSONDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.ParquetDataSet" = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"]
"pandas.SQLTableDataSet" = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"]
"pandas.SQLQueryDataSet" = [
"kedro-datasets[pandas-base]",
"SQLAlchemy>=1.4, <3.0",
"pyodbc~=4.0"
]
"pandas.XMLDataSet" = ["kedro-datasets[pandas-base]", "lxml~=4.6"]

pickle = ["kedro-datasets[pickle.PickleDataSet]"]
"pickle.PickleDataSet" = ["compress-pickle[lz4]~=2.1.0"]

pillow = ["kedro-datasets[pillow.ImageDataSet]"]
"pillow.ImageDataSet" = ["Pillow~=9.0"]

plotly = ["kedro-datasets[plotly.PlotlyDataSet,plotly.JSONDataSet]"]
"plotly.JSONDataSet" = ["kedro-datasets[plotly-base]"]
"plotly.PlotlyDataSet" = ["kedro-datasets[pandas-base,plotly-base]"]

polars = ["kedro-datasets[polars.CSVDataSet]"]
"polars.CSVDataSet" = ["polars~=0.17.0"]

redis = ["kedro-datasets[redis.PickleDataSet]"]
"redis.PickleDataSet" = ["redis~=4.1"]

snowflake = ["kedro-datasets[snowflake.SnowparkTableDataSet]"]
"snowflake.SnowparkTableDataSet" = [
"snowflake-snowpark-python~=1.0.0; python_version == '3.8'",
"pyarrow~=8.0"
]

spark = [
"kedro-datasets[spark.DeltaTableDataSet,spark.SparkDataSet,spark.SparkHiveDataSet,spark.SparkJDBCDataSet]"
]
"spark.DeltaTableDataSet" = [
"kedro-datasets[spark-base,hdfs-base,s3fs-base]",
"delta-spark>=1.0, <3.0"
]
"spark.SparkDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
"spark.SparkHiveDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
"spark.SparkJDBCDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]

svmlight = ["kedro-datasets[svmlight.SVMLightDataSet]"]
"svmlight.SVMLightDataSet" = ["scikit-learn~=1.0.2", "scipy~=1.7.3"]

tensorflow = ["kedro-datasets[tensorflow.TensorFlowModelDataSet]"]
"tensorflow.TensorFlowModelDataSet" = [
# currently only TensorFlow V2 supported for saving and loading.
# V1 requires HDF5 and serialises differently
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'"
]

video = ["kedro-datasets[video.VideoDataSet]"]
"video.VideoDataSet" = ["opencv-python~=4.5.5.64"]

yaml = ["kedro-datasets[yaml.YAMLDataSet]"]
"yaml.YAMLDataSet" = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"]

all = [
"""kedro-datasets[\
api,biosequence,dask,databricks,\
geopandas,holoviews,matplotlib,\
networkx,pickle,pillow,plotly,\
polars,snowflake,redis,spark,svmlight,\
tensorflow,video,yaml\
]"""
]
docs = [
# docutils>=0.17 changed the HTML
# see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
"docutils==0.16",
"sphinx~=5.3.0",
"sphinx_rtd_theme==1.2.0",
# Regression on sphinx-autodoc-typehints 1.21
# that creates some problematic docstrings
"sphinx-autodoc-typehints==1.20.2",
"sphinx_copybutton==0.3.1",
"sphinx-notfound-page",
"ipykernel>=5.3, <7.0",
"sphinxcontrib-mermaid~=0.7.1",
"myst-parser~=1.0.0",
"Jinja2<3.1.0"
]
test = [
"adlfs>=2021.7.1, <=2022.2",
"bandit>=1.6.2, <2.0",
"behave==1.2.6",
"biopython~=1.73",
"blacken-docs==1.9.2",
"black~=22.0",
"compress-pickle[lz4]~=1.2.0",
"coverage[toml]",
"dask[complete]",
"delta-spark~=1.2.1",
"deltalake>=0.10.0",
# 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
"dill~=0.3.1",
"filelock>=3.4.0, <4.0",
"gcsfs>=2021.4, <=2022.1",
"geopandas>=0.6.0, <1.0",
"hdfs>=2.5.8, <3.0",
"holoviews~=1.13.0",
"import-linter[toml]==1.2.6",
"ipython>=7.31.1, <8.0",
"Jinja2<3.1.0",
"joblib>=0.14",
"jupyterlab~=3.0",
"jupyter~=1.0",
"lxml~=4.6",
"matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews
"matplotlib>=3.5, <3.6; python_version == '3.10'",
"memory_profiler>=0.50.0, <1.0",
"moto==1.3.7; python_version < '3.10'",
"moto==3.0.4; python_version == '3.10'",
"networkx~=2.4",
"opencv-python~=4.5.5.64",
"openpyxl>=3.0.3, <4.0",
"pandas-gbq>=0.12.0, <0.18.0",
"pandas>=1.3, <2", # 1.3 for read_xml/to_xml, <2 for compatibility with Spark < 3.4
"Pillow~=9.0",
"plotly>=4.8.0, <6.0",
"polars~=0.15.13",
"pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2.
"psutil==5.8.0",
"pyarrow~=8.0",
"pylint>=2.5.2, <3.0",
"pyodbc~=4.0.35",
"pyproj~=3.0",
"pyspark>=2.2, <4.0",
"pytest-cov~=3.0",
"pytest-mock>=1.7.1, <2.0",
"pytest-xdist[psutil]~=2.2.1",
"pytest~=7.2",
"redis~=4.1",
"requests-mock~=1.6",
"requests~=2.20",
"s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
"scikit-learn~=1.0.2",
"scipy~=1.7.3",
"snowflake-snowpark-python~=1.0.0; python_version == '3.8'",
"SQLAlchemy>=1.4, <3.0",
# The `Inspector.has_table()` method replaces the `Engine.has_table()` method in version 1.4.
"tables~=3.7",
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
"triad>=0.6.7, <1.0",
"trufflehog~=2.1",
"xlsxwriter~=1.0"
"kedro>=0.16",
"lazy_loader",
]
dynamic = ["readme", "version", "optional-dependencies"]

[project.urls]
Source = "https://github.com/kedro-org/kedro-plugins/tree/main/kedro-datasets"
Expand All @@ -257,18 +35,18 @@ profile = "black"
[tool.pylint.master]
ignore = "CVS"
load-plugins = [
"pylint.extensions.docparams",
"pylint.extensions.no_self_use"
"pylint.extensions.docparams",
"pylint.extensions.no_self_use"
]
extension-pkg-whitelist = "cv2"
unsafe-load-any-extension = false

[tool.pylint.messages_control]
disable = [
"ungrouped-imports",
"duplicate-code",
"too-many-instance-attributes",
"too-few-public-methods", # https://github.com/pylint-dev/pylint/issues/8865
"ungrouped-imports",
"duplicate-code",
"too-many-instance-attributes",
"too-few-public-methods", # https://github.com/pylint-dev/pylint/issues/8865
]
enable = ["useless-suppression"]

Expand All @@ -283,8 +61,8 @@ indent-string = " "

[tool.pylint.miscellaneous]
notes = [
"FIXME",
"XXX"
"FIXME",
"XXX"
]

[tool.pylint.design]
Expand Down
Loading

0 comments on commit b9f990d

Please sign in to comment.