Skip to content

Commit

Permalink
revert: Remove all tag based features
Browse files Browse the repository at this point in the history
  • Loading branch information
dangotbanned committed Jan 13, 2025
1 parent 6035b39 commit 5d8b6db
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 126 deletions.
9 changes: 2 additions & 7 deletions altair/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing_extensions import LiteralString

from altair.datasets._loader import _Load
from altair.datasets._typing import Dataset, Extension, Version
from altair.datasets._typing import Dataset, Extension


__all__ = ["Loader", "load", "url"]
Expand Down Expand Up @@ -47,7 +47,6 @@ def url(
name: Dataset | LiteralString,
suffix: Extension | None = None,
/,
tag: Version | None = None,
) -> str:
"""
Return the address of a remote dataset.
Expand All @@ -61,15 +60,11 @@ def url(
.. note::
Only needed if ``name`` is available in multiple formats.
tag
Version identifier for a `vega-datasets release`_.
.. _Path.stem:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
.. _Path.suffix:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
.. _vega-datasets release:
https://github.com/vega/vega-datasets/releases
Related
-------
Expand All @@ -83,7 +78,7 @@ def url(
try:
from altair.datasets._loader import load

url = load.url(name, suffix, tag=tag)
url = load.url(name, suffix)
except AltairDatasetsError:
from altair.datasets._cache import url_cache

Expand Down
16 changes: 4 additions & 12 deletions altair/datasets/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import narwhals.stable.v1 as nw
from narwhals.stable.v1.typing import IntoDataFrameT, IntoFrameT

from altair.datasets._typing import VERSION_LATEST

if sys.version_info >= (3, 12):
from typing import Protocol
else:
Expand Down Expand Up @@ -105,10 +103,7 @@ class UrlCache(CompressedCache[_KT, _VT]):
`csv`_, `gzip`_ -based, lazy url lookup.
Operates on a subset of available datasets:
- Only the latest version
- Excludes `.parquet`, which `cannot be read via url`_
- Name collisions are pre-resolved
- Only provide the smallest (e.g. ``weather.json`` instead of ``weather.csv``)
.. _csv:
https://docs.python.org/3/library/csv.html
Expand Down Expand Up @@ -256,13 +251,10 @@ def download_all(self) -> None:
Requires **30-50MB** of disk-space.
"""
stems = tuple(fp.stem for fp in self)
latest = nw.col("tag") == nw.lit(VERSION_LATEST)
predicates = (~(nw.col("sha").is_in(stems)), latest) if stems else (latest,)
predicates = (~(nw.col("sha").is_in(stems)),) if stems else ()
frame = (
self._rd._scan_metadata(
predicates, ext_supported=True, name_collision=False
)
.select("sha", "suffix", "url_npm")
self._rd._scan_metadata(predicates, is_image=False) # type: ignore
.select("sha", "suffix", "url")
.unique("sha")
.collect()
)
Expand All @@ -272,7 +264,7 @@ def download_all(self) -> None:
print(f"Downloading {len(frame)} missing datasets...")
for row in frame.iter_rows(named=True):
fp: Path = self.path / (row["sha"] + row["suffix"])
with self._rd._opener.open(row["url_npm"]) as f:
with self._rd._opener.open(row["url"]) as f:
fp.touch()
fp.write_bytes(f.read())
print("Finished downloads")
Expand Down
39 changes: 12 additions & 27 deletions altair/datasets/_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
else:
from typing_extensions import LiteralString
from altair.datasets._readers import _Backend
from altair.datasets._typing import Dataset, Extension, Version
from altair.datasets._typing import Dataset, Extension


__all__ = ["Loader", "load"]
Expand Down Expand Up @@ -111,7 +111,7 @@ def from_backend(cls, backend_name: _Backend, /) -> Loader[Any, Any]:
Using ``pandas``, backed by ``pyarrow`` dtypes:
data = Loader.from_backend("pandas[pyarrow]")
cars = data("cars", tag="v1.29.0")
cars = data("cars")
>>> type(cars) # doctest: +SKIP
pandas.core.frame.DataFrame
Expand All @@ -137,7 +137,6 @@ def __call__(
name: Dataset | LiteralString,
suffix: Extension | None = None,
/,
tag: Version | None = None,
**kwds: Any,
) -> IntoDataFrameT:
"""
Expand All @@ -152,17 +151,13 @@ def __call__(
.. note::
Only needed if ``name`` is available in multiple formats.
tag
Version identifier for a `vega-datasets release`_.
**kwds
Arguments passed to the underlying read function.
.. _Path.stem:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
.. _Path.suffix:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
.. _vega-datasets release:
https://github.com/vega/vega-datasets/releases
Examples
--------
Expand All @@ -171,7 +166,7 @@ def __call__(
from altair.datasets import Loader
data = Loader.from_backend("polars")
source = data("iowa-electricity", tag="v2.10.0")
source = data("iowa-electricity")
>>> source.columns # doctest: +SKIP
['year', 'source', 'net_generation']
Expand Down Expand Up @@ -199,7 +194,7 @@ def __call__(
Using ``pandas``:
data = Loader.from_backend("pandas")
source = data("iowa-electricity", tag="v2.10.0")
source = data("iowa-electricity")
>>> source.columns # doctest: +SKIP
Index(['year', 'source', 'net_generation'], dtype='object')
Expand All @@ -223,7 +218,7 @@ def __call__(
Using ``pyarrow``:
data = Loader.from_backend("pyarrow")
source = data("iowa-electricity", tag="v2.10.0")
source = data("iowa-electricity")
>>> source.column_names # doctest: +SKIP
['year', 'source', 'net_generation']
Expand All @@ -238,14 +233,13 @@ def __call__(
source: [["Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels",...,"Renewables","Renewables","Renewables","Renewables","Renewables"]]
net_generation: [[35361,35991,36234,36205,36883,...,16476,17452,19091,21241,21933]]
"""
return self._reader.dataset(name, suffix, tag=tag, **kwds)
return self._reader.dataset(name, suffix, **kwds)

def url(
self,
name: Dataset | LiteralString,
suffix: Extension | None = None,
/,
tag: Version | None = None,
) -> str:
"""
Return the address of a remote dataset.
Expand All @@ -259,15 +253,11 @@ def url(
.. note::
Only needed if ``name`` is available in multiple formats.
tag
Version identifier for a `vega-datasets release`_.
.. _Path.stem:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
.. _Path.suffix:
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
.. _vega-datasets release:
https://github.com/vega/vega-datasets/releases
Examples
--------
Expand All @@ -277,15 +267,15 @@ def url(
from altair.datasets import Loader
data = Loader.from_backend("polars")
>>> data.url("cars", tag="v2.9.0") # doctest: +SKIP
'https://cdn.jsdelivr.net/npm/vega-datasets@v2.9.0/data/cars.json'
>>> data.url("cars") # doctest: +SKIP
'https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json'
We can pass the result directly to a chart:
url = data.url("cars", tag="v2.9.0")
url = data.url("cars")
alt.Chart(url).mark_point().encode(x="Horsepower:Q", y="Miles_per_Gallon:Q")
"""
return self._reader.url(name, suffix, tag=tag)
return self._reader.url(name, suffix)

@property
def cache(self) -> DatasetCache[IntoDataFrameT, IntoFrameT]:
Expand Down Expand Up @@ -318,7 +308,6 @@ def __call__( # pyright: ignore[reportOverlappingOverload]
name: Dataset | LiteralString,
suffix: Extension | None = ...,
/,
tag: Version | None = ...,
backend: None = ...,
**kwds: Any,
) -> IntoDataFrameT: ...
Expand All @@ -328,7 +317,6 @@ def __call__(
name: Dataset | LiteralString,
suffix: Extension | None = ...,
/,
tag: Version | None = ...,
backend: Literal["polars"] = ...,
**kwds: Any,
) -> pl.DataFrame: ...
Expand All @@ -338,7 +326,6 @@ def __call__(
name: Dataset | LiteralString,
suffix: Extension | None = ...,
/,
tag: Version | None = ...,
backend: Literal["pandas", "pandas[pyarrow]"] = ...,
**kwds: Any,
) -> pd.DataFrame: ...
Expand All @@ -348,7 +335,6 @@ def __call__(
name: Dataset | LiteralString,
suffix: Extension | None = ...,
/,
tag: Version | None = ...,
backend: Literal["pyarrow"] = ...,
**kwds: Any,
) -> pa.Table: ...
Expand All @@ -357,14 +343,13 @@ def __call__(
name: Dataset | LiteralString,
suffix: Extension | None = None,
/,
tag: Version | None = None,
backend: _Backend | None = None,
**kwds: Any,
) -> IntoDataFrameT | pl.DataFrame | pd.DataFrame | pa.Table:
if backend is None:
return super().__call__(name, suffix, tag, **kwds)
return super().__call__(name, suffix, **kwds)
else:
return self.from_backend(backend)(name, suffix, tag=tag, **kwds)
return self.from_backend(backend)(name, suffix, **kwds)


load: _Load[Any, Any]
Expand Down
Loading

0 comments on commit 5d8b6db

Please sign in to comment.