diff --git a/.gitignore b/.gitignore index 68bc17f..78d5334 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ target/ # Jupyter Notebook .ipynb_checkpoints +.virtual_documents # IPython profile_default/ @@ -158,3 +159,10 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# Sphinx and documentation intermediates +examples/*.zip +examples/*.html +doc/_build +doc/auto_examples +doc/sg_execution_times.rst diff --git a/datamapplot/config.py b/datamapplot/config.py index e9f2350..955bd8b 100644 --- a/datamapplot/config.py +++ b/datamapplot/config.py @@ -1,7 +1,15 @@ -from warnings import warn +from collections.abc import Sequence +import inspect as ins import json -import platformdirs from pathlib import Path +import platformdirs +from typing import Any, Callable, cast, ParamSpec, TypeVar, Union +from warnings import warn + + +P = ParamSpec("P") +T = TypeVar("T") + DEFAULT_CONFIG = { "dpi": 100, @@ -9,6 +17,17 @@ "cdn_url": "unpkg.com", } + +class ConfigError(Exception): + + def __init__(self, message: str, parameter: ins.Parameter) -> None: + super().__init__(message) + self.parameter = parameter + + +UnconfigurableParameters = Sequence[str] + + class ConfigManager: """Configuration manager for the datamapplot package.""" @@ -19,13 +38,13 @@ def __new__(cls): cls._instance = super(ConfigManager, cls).__new__(cls) cls._instance._config = {} return cls._instance - + def __init__(self): - if self._instance is None: + if not self._config: self._config_dir = platformdirs.user_config_dir("datamapplot") self._config_file = Path(self._config_dir) / "config.json" self._config = DEFAULT_CONFIG.copy() - + self._ensure_config_file() self._load_config() @@ -33,13 +52,13 @@ def _ensure_config_file(self) -> None: """Create config directory and file if they don't exist.""" try: self._config_file.parent.mkdir(parents=True, exist_ok=True) - + if not self._config_file.exists(): with open(self._config_file, 'w') as f: json.dump(DEFAULT_CONFIG, f, indent=2) except Exception as e: warn(f"Error creating config file: {e}") - + def _load_config(self) -> None: """Load configuration from file.""" try: @@ -48,7 +67,7 @@ def _load_config(self) -> None: self._config.update(loaded_config) except Exception as e: warn(f"Error loading config file: {e}") - + def save(self) -> None: """Save current configuration to file.""" try: @@ -56,10 +75,10 @@ def save(self) -> None: json.dump(self._config, f, indent=2) except Exception as e: warn(f"Error saving config file: {e}") - + def __getitem__(self, key): return self._config[key] - + def __setitem__(self, key, value): self._config[key] = value @@ -69,4 +88,55 @@ def __delitem__(self, key): def __contains__(self, key): return key in self._config - \ No newline at end of file + def complete( + self, + fn_or_unc: Union[None, UnconfigurableParameters, Callable[P, T]] = None, + unconfigurable: UnconfigurableParameters = set(), + ) -> Union[Callable[[Callable[P, T]], Callable[P, T]], Callable[P, T]]: + def decorator(fn: Callable[P, T]) -> Callable[P, T]: + sig = ins.signature(fn) + + def fn_with_config(*args, **kwargs): + bound_args = sig.bind(*args, **kwargs) + bindings = bound_args.arguments + from_config = {} + for name, param in sig.parameters.items(): + if name not in bindings and name in self: + if not _is_admissible(param): + raise ConfigError( + "Only keyword (or plausibly keyword) parameters " + "can be set through the DataMapPlot configuration " + f"file. Parameter {param.name} ({param.kind}) " + "is thus not admissible.", + param + ) + if name in unconfigurable: + raise ConfigError( + f"Parameter {param.name} is deliberately listed as " + "forbidden from being defined through the DataMapPlot " + "configuration file.", + param + ) + from_config[name] = self[name] + return fn(*bound_args.args, **(bound_args.kwargs | from_config)) + + fn_with_config._gets_completed = True + return fn_with_config + + if fn_or_unc is None: + return decorator + elif not hasattr(fn_or_unc, "__call__"): + unconfigurable = cast(UnconfigurableParameters, fn_or_unc) + return decorator + return decorator(fn_or_unc) + + @staticmethod + def gets_completed(func) -> bool: + return hasattr(func, "_gets_completed") and func._gets_completed + + +_KINDS_ADMISSIBLE = {ins.Parameter.POSITIONAL_OR_KEYWORD, ins.Parameter.KEYWORD_ONLY} + + +def _is_admissible(param: ins.Parameter) -> bool: + return param.kind in _KINDS_ADMISSIBLE diff --git a/datamapplot/create_plots.py b/datamapplot/create_plots.py index 55f4bcd..9763c3c 100644 --- a/datamapplot/create_plots.py +++ b/datamapplot/create_plots.py @@ -2,7 +2,6 @@ import pandas as pd import textwrap import colorcet -import inspect from matplotlib import pyplot as plt from matplotlib.colors import to_rgb @@ -23,6 +22,10 @@ from datamapplot.config import ConfigManager +cfg = ConfigManager() + + +@cfg.complete(unconfigurable={"data_map_coords", "labels"}) def create_plot( data_map_coords, labels=None, @@ -172,19 +175,6 @@ def create_plot( The axes contained within the figure that the plot is rendered to. """ - function_signature = inspect.signature(create_plot) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("data_map_coords", "labels"): - continue - - provided_value = function_args.get(param_name) - if provided_value == param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - if labels is None: label_locations = np.zeros((0, 2), dtype=np.float32) label_text = [] @@ -333,6 +323,7 @@ def create_plot( return fig, ax +@cfg.complete(unconfigurable={"data_map_coords", "label_layers", "hover_text"}) def create_interactive_plot( data_map_coords, *label_layers, @@ -472,19 +463,6 @@ def create_interactive_plot( ------- """ - function_signature = inspect.signature(create_interactive_plot) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("data_map_coords", "label_layers", "hover_text"): - continue - - provided_value = function_args.get(param_name) - if provided_value is param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - if len(label_layers) == 0: label_dataframe = pd.DataFrame( { diff --git a/datamapplot/interactive_rendering.py b/datamapplot/interactive_rendering.py index 1548ec5..4427ede 100644 --- a/datamapplot/interactive_rendering.py +++ b/datamapplot/interactive_rendering.py @@ -6,7 +6,6 @@ import warnings import zipfile import json -import inspect import platformdirs import jinja2 @@ -33,6 +32,9 @@ from datamapplot.config import ConfigManager from datamapplot import offline_mode_caching + +cfg = ConfigManager() + _DECKGL_TEMPLATE_STR = (files("datamapplot") / "deckgl_template.html").read_text( encoding="utf-8" ) @@ -399,6 +401,7 @@ def label_text_and_polygon_dataframes( return pd.DataFrame(data) +@cfg.complete(unconfigurable={"point_dataframe", "label_dataframe"}) def render_html( point_dataframe, label_dataframe, @@ -718,19 +721,6 @@ def render_html( An interactive figure with hover, pan, and zoom. This will display natively in a notebook, and can be saved to an HTML file via the `save` method. """ - function_signature = inspect.signature(render_html) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("point_dataframe", "label_dataframe"): - continue - - provided_value = function_args.get(param_name) - if provided_value is param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - # Compute point scaling n_points = point_dataframe.shape[0] if point_size_scale is not None: diff --git a/datamapplot/plot_rendering.py b/datamapplot/plot_rendering.py index 8514abe..eebe368 100644 --- a/datamapplot/plot_rendering.py +++ b/datamapplot/plot_rendering.py @@ -30,7 +30,9 @@ import requests import re -import inspect + + +cfg = ConfigManager() class GoogleAPIUnreachable(Warning): @@ -167,6 +169,15 @@ def add_glow_to_scatterplot( ) +@cfg.complete( + unconfigurable={ + "data_map_coords", + "color_list", + "label_text", + "label_locations", + "label_cluster_sizes", + } +) def render_plot( data_map_coords, color_list, @@ -452,25 +463,6 @@ def render_plot( The axes contained within the figure that the plot is rendered to. """ - function_signature = inspect.signature(render_plot) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ( - "data_map_coords", - "color_list", - "label_text", - "label_locations", - "label_cluster_sizes", - ): - continue - - provided_value = function_args.get(param_name) - if provided_value is param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - # Create the figure if ax is None: fig, ax = plt.subplots(figsize=figsize, dpi=dpi, constrained_layout=True) diff --git a/datamapplot/selection_handlers.py b/datamapplot/selection_handlers.py index 7782ec3..21e6a20 100644 --- a/datamapplot/selection_handlers.py +++ b/datamapplot/selection_handlers.py @@ -1,9 +1,11 @@ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS import string -import inspect from datamapplot.config import ConfigManager + +cfg = ConfigManager() + _DEFAULT_TAG_COLORS = [ "#1f77b4", "#ff7f0e","#2ca02c","#d62728","#9467bd","#8c564b","#e377c2","#7f7f7f", "#bcbd22","#17becf","#a6008a","#656100","#8aa6ff","#007155","#ce968a","#6139f3", @@ -76,20 +78,8 @@ class DisplaySample(SelectionHandlerBase): """ + @cfg.complete(unconfigurable={"self", "n_samples"}) def __init__(self, n_samples=256, font_family=None, cdn_url="unpkg.com", **kwargs): - function_signature = inspect.signature(DisplaySample.__init__) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("self", "n_samples"): - continue - - provided_value = function_args.get(param_name) - if provided_value == param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - super().__init__( dependencies=[ f"https://{cdn_url}/jquery@3.7.1/dist/jquery.min.js" @@ -277,6 +267,7 @@ class WordCloud(SelectionHandlerBase): """ + @cfg.complete(unconfigurable={"self", "width", "height", "n_words"}) def __init__( self, n_words=256, @@ -290,19 +281,6 @@ def __init__( cdn_url="unpkg.com", **kwargs, ): - function_signature = inspect.signature(WordCloud.__init__) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("self", "width", "height", "n_words"): - continue - - provided_value = function_args.get(param_name) - if provided_value == param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - super().__init__( dependencies=[ f"https://{cdn_url}/d3@latest/dist/d3.min.js", @@ -474,6 +452,7 @@ class CohereSummary(SelectionHandlerBase): Additional keyword arguments to pass to the SelectionHandlerBase constructor. """ + @cfg.complete(unconfigurable={"self", "width", "n_keywords", "n_samples"}) def __init__( self, model="command-r", @@ -485,19 +464,6 @@ def __init__( cdn_url="unpkg.com", **kwargs, ): - function_signature = inspect.signature(CohereSummary.__init__) - function_args = locals() - config = ConfigManager() - - for param_name, param_value in function_signature.parameters.items(): - if param_name in ("self", "width", "n_keywords", "n_samples"): - continue - - provided_value = function_args.get(param_name) - if provided_value == param_value.default: - if param_name in config: - function_args[param_name] = config[param_name] - super().__init__( dependencies=[ f"https://{cdn_url}/jquery@3.7.1/dist/jquery.min.js", diff --git a/datamapplot/tests/test_config.py b/datamapplot/tests/test_config.py new file mode 100644 index 0000000..dd4d2cc --- /dev/null +++ b/datamapplot/tests/test_config.py @@ -0,0 +1,114 @@ +from copy import copy +import inspect as ins +from pathlib import Path +import platformdirs +import pytest + +from .. import create_plot, create_interactive_plot, render_plot, render_html +from ..config import ConfigManager, ConfigError +from ..selection_handlers import DisplaySample, WordCloud, CohereSummary + + +@pytest.fixture +def no_change_to_config_file(): + cfgmgr = ConfigManager() + assert cfgmgr._config_file.is_file() + contents_before = cfgmgr._config_file.read_bytes() + try: + yield None + finally: + contents_after = cfgmgr._config_file.read_bytes() + if contents_after != contents_before: + cfgmgr._config_file.write_bytes(contents_before) + pytest.fail( + "Unit test was supposed not to change the configuration file, " + "yet it did." + ) + + +def test_tweak_config_sanity(no_change_to_config_file): + cfgmgr = ConfigManager() + cfgmgr["asdf"] = "qwer" + + +@pytest.fixture +def config(no_change_to_config_file): + config = ConfigManager() + orig = copy(config._config) + yield config + config._config = orig + + +@pytest.fixture +def the_func(config): + for name in ["a", "args", "b", "c", "dont_touch", "kwargs"]: + assert name not in config + + @config.complete({"dont_touch"}) + def _the_func(a, *args, b=None, c="asdf", dont_touch="nope", **kwargs): + return a, args, b, c, dont_touch, kwargs + return _the_func + + +def test_no_config_args(the_func, config): + config["args"] = ("heck", "no") + with pytest.raises(ConfigError): + the_func("A") + + +def test_no_config_kwargs(the_func, config): + config["kwargs"] = {"heck": "no"} + with pytest.raises(ConfigError): + the_func("A") + + +def test_config_positional_useless(the_func, config): + config["a"] = "how would that even work?" # Can never reach. + assert the_func("A") == ("A", (), None, "asdf", "nope", {}) + + +def test_fetch_b_config(the_func, config): + config["b"] = 98 + assert the_func("A") == ("A", (), 98, "asdf", "nope", {}) + + +def test_override_configed_b(the_func, config): + config["b"] = 98 + assert the_func("A", "B", b=3) == ("A", ("B",), 3, "asdf", "nope", {}) + + +def test_nonconfiged_c(the_func, config): + config["b"] = 98 + assert the_func("A", c="qwer") == ("A", (), 98, "qwer", "nope", {}) + + +def test_no_config_donttouch(the_func, config): + config["dont_touch"] = "this mustn't work" + with pytest.raises(ConfigError): + the_func("A") + + +def test_override_donttouch(the_func): + assert the_func("A", dont_touch="poke") == ("A", (), None, "asdf", "poke", {}) + + +@pytest.mark.parametrize( + "func", + [ + create_plot, + create_interactive_plot, + render_plot, + render_html, + DisplaySample.__init__, + WordCloud.__init__, + CohereSummary.__init__ + ] +) +def test_has_config(func): + assert ConfigManager.gets_completed(func) + + +def test_sanity_config_display_sample(config): + assert DisplaySample().font_family is None + config["font_family"] = "Roboto" + assert DisplaySample().font_family == "Roboto" diff --git a/doc/configuration.ipynb b/doc/configuration.ipynb new file mode 100644 index 0000000..2ff515d --- /dev/null +++ b/doc/configuration.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2285300b-3bc1-4004-9d55-90114ce84df6", + "metadata": {}, + "source": [ + "# Persistent configuration" + ] + }, + { + "cell_type": "markdown", + "id": "f0a0ae95-a85e-4231-ad3d-34714ec5395f", + "metadata": {}, + "source": [ + "Both static and interactive plots produced through DataMapPlots can be customized through many settings.\n", + "When such customizations become more numerous and pervasive through one's usage,\n", + "copy-pasting the settings in every invocation of `create_plot` and friends can become cumbersome.\n", + "The solution to this problem is to write these common custom settings up in the **DataMapPlot configuration file**." + ] + }, + { + "cell_type": "markdown", + "id": "d2162b13-1f78-4cca-9b6e-f986530fdebe", + "metadata": {}, + "source": [ + "When DataMapPlot is first imported in any Python code, it creates this configuration file in the \"most appropriate place\" given one's computing platform." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8ca503e7-709a-4cea-af8e-f60dcf0e6cd6", + "metadata": {}, + "outputs": [], + "source": [ + "import datamapplot" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "405e05c0-6c42-43ef-8a48-c079880e2fb2", + "metadata": {}, + "outputs": [], + "source": [ + "from datamapplot.config import ConfigManager\n", + "cfg = ConfigManager()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8f64cb6e-c086-4a5b-8590-ff7569cf4e23", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".config/datamapplot/config.json\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "print(ConfigManager()._config_file.relative_to(Path.home()))" + ] + }, + { + "cell_type": "markdown", + "id": "cdeb7e18-ed88-4877-9ee3-d84859b52bf6", + "metadata": {}, + "source": [ + "The default configurationm file includes minimal settings that echo our users' typical purposes for bending away from the default parameter values of the main DataMapPlot routines." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e78744cd-a6e9-4e5f-872e-05b05ec262b2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"dpi\": 100,\n", + " \"figsize\": [\n", + " 10,\n", + " 10\n", + " ],\n", + " \"cdn_url\": \"unpkg.com\",\n", + " \"font_family\": \"Roboto\"\n", + "}\n" + ] + } + ], + "source": [ + "print(cfg._config_file.read_text())" + ] + }, + { + "cell_type": "markdown", + "id": "6e9b2f3a-68f5-49cd-ab51-668509eef624", + "metadata": {}, + "source": [ + "Most parameters that carry a default value for `create_plot`, `create_interactive_plot` and other interface routines can be set by editing this JSON file.\n", + "Yet, some of the most basic plot ingredients cannot be set through this file.\n", + "An attempt to do so will raise a `ConfigError` exception when creating a plot." + ] + }, + { + "cell_type": "markdown", + "id": "ec9dcbce-94c5-4494-8e71-f2bf1f42c1a4", + "metadata": {}, + "source": [ + "## Setting persistent configuration through Python code" + ] + }, + { + "cell_type": "markdown", + "id": "7294dcd3-e43d-4eca-92fa-00c2ddcfc5b1", + "metadata": {}, + "source": [ + "`ConfigManager` is a [singleton class](https://www.patterns.dev/vanilla/singleton-pattern/) whose indexing getting and setting work just like a dictionary.\n", + "Thus, custom settings can be set through Python code." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7a9d45f9-2e10-4186-be87-51dffd1bf795", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100\n" + ] + } + ], + "source": [ + "print(cfg[\"dpi\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0deeb77d-0b16-4742-a718-abf653e59cb4", + "metadata": {}, + "outputs": [], + "source": [ + "cfg[\"font_family\"] = \"Roboto\"" + ] + }, + { + "cell_type": "markdown", + "id": "82bc72f0-1f91-4b7f-a649-7b7ab6c62b9a", + "metadata": {}, + "source": [ + "These configuration settings can be made to persist between sessions by saving to the configuration file." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "86c2c736-699e-4fd4-bb53-241ddc9c3e62", + "metadata": {}, + "outputs": [], + "source": [ + "cfg.save()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/index.rst b/doc/index.rst index 3781554..80c0ab2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -56,8 +56,9 @@ and followed by guides on basic usage, through to the more complicated options a .. toctree:: :maxdepth: 1 - :caption: Gallery, API and FAQ: + :caption: Configuration, gallery, API and FAQ: + configuration auto_examples/index api faq