From 905dee5f605740292cd97c1b00afe4363b71b72e Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 25 Jun 2024 12:52:53 +0200 Subject: [PATCH 1/5] feature(options): add cache_dir option --- mkdocs_rss_plugin/config.py | 4 ++++ mkdocs_rss_plugin/constants.py | 1 + mkdocs_rss_plugin/plugin.py | 10 ++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/mkdocs_rss_plugin/config.py b/mkdocs_rss_plugin/config.py index 89dd61e..b405b33 100644 --- a/mkdocs_rss_plugin/config.py +++ b/mkdocs_rss_plugin/config.py @@ -8,6 +8,9 @@ from mkdocs.config import config_options from mkdocs.config.base import Config +# package +from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER + # ############################################################################ # ########## Classes ############### # ################################## @@ -42,6 +45,7 @@ class RssPluginConfig(Config): categories = config_options.Optional( config_options.ListOfItems(config_options.Type(str)) ) + cache_dir = config_options.Type(str, default=f"{DEFAULT_CACHE_FOLDER.resolve()}") comments_path = config_options.Optional(config_options.Type(str)) date_from_meta = config_options.SubConfig(_DateFromMeta) enabled = config_options.Type(bool, default=True) diff --git a/mkdocs_rss_plugin/constants.py b/mkdocs_rss_plugin/constants.py index 07a8ddb..971802b 100644 --- a/mkdocs_rss_plugin/constants.py +++ b/mkdocs_rss_plugin/constants.py @@ -14,6 +14,7 @@ # ########## Globals ############# # ################################ +DEFAULT_CACHE_FOLDER = Path(".cache/plugins/rss") DEFAULT_TEMPLATE_FOLDER = Path(__file__).parent / "templates" DEFAULT_TEMPLATE_FILENAME = DEFAULT_TEMPLATE_FOLDER / "rss.xml.jinja2" MKDOCS_LOGGER_NAME = "[RSS-plugin]" diff --git a/mkdocs_rss_plugin/plugin.py b/mkdocs_rss_plugin/plugin.py index 9faff59..b672e11 100644 --- a/mkdocs_rss_plugin/plugin.py +++ b/mkdocs_rss_plugin/plugin.py @@ -92,6 +92,11 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: self.config.enabled = False return config + # cache dir + self.cache_dir = Path(self.config.cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + logger.debug(f"Caching HTTP requests to: {self.cache_dir.resolve()}") + # integrations - check if theme is Material and if social cards are enabled self.integration_material_social_cards = IntegrationMaterialSocialCards( mkdocs_config=config, @@ -100,6 +105,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: # instantiate plugin tooling self.util = Util( + cache_dir=self.cache_dir, use_git=self.config.use_git, integration_material_social_cards=self.integration_material_social_cards, ) @@ -169,10 +175,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: self.config.date_from_meta.default_time = datetime.strptime( self.config.date_from_meta.default_time, "%H:%M" ) - print( - self.config.date_from_meta.default_time, - type(self.config.date_from_meta.default_time), - ) except (TypeError, ValueError) as err: logger.warning( "Config error: `date_from_meta.default_time` value " From 89cd6624864382b7aa1c00f2aac8f5e5b8880ce7 Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 25 Jun 2024 15:30:38 +0200 Subject: [PATCH 2/5] feature(http): use caching control to improve HTTP requests performance --- mkdocs_rss_plugin/util.py | 18 +++++++++++++++--- requirements/base.txt | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/mkdocs_rss_plugin/util.py b/mkdocs_rss_plugin/util.py index 812a546..dc95109 100644 --- a/mkdocs_rss_plugin/util.py +++ b/mkdocs_rss_plugin/util.py @@ -19,6 +19,8 @@ # 3rd party import markdown import urllib3 +from cachecontrol import CacheControl +from cachecontrol.caches.file_cache import SeparateBodyFileCache from git import ( GitCommandError, GitCommandNotFound, @@ -34,7 +36,11 @@ from requests.exceptions import ConnectionError, HTTPError # package -from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS +from mkdocs_rss_plugin.constants import ( + DEFAULT_CACHE_FOLDER, + MKDOCS_LOGGER_NAME, + REMOTE_REQUEST_HEADERS, +) from mkdocs_rss_plugin.git_manager.ci import CiHandler from mkdocs_rss_plugin.integrations.theme_material_social_plugin import ( IntegrationMaterialSocialCards, @@ -67,6 +73,7 @@ class Util: def __init__( self, path: str = ".", + cache_dir: Path = DEFAULT_CACHE_FOLDER, use_git: bool = True, integration_material_social_cards: Optional[ IntegrationMaterialSocialCards @@ -122,8 +129,13 @@ def __init__( self.social_cards = integration_material_social_cards # http/s session - self.req_session = Session() - self.req_session.headers.update(REMOTE_REQUEST_HEADERS) + session = Session() + session.headers.update(REMOTE_REQUEST_HEADERS) + self.req_session = CacheControl( + sess=session, + cache=SeparateBodyFileCache(directory=cache_dir), + cacheable_methods=("GET", "HEAD"), + ) def build_url( self, base_url: str, path: str, args_dict: Optional[dict] = None diff --git a/requirements/base.txt b/requirements/base.txt index bb2decf..cb8de78 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,7 +1,7 @@ # Common requirements # ----------------------- - +cachecontrol[filecache] >=0.14,<1 GitPython>=3.1,<3.2 mkdocs>=1.5,<2 requests>=2.31,<3 From a29c541123e981e8e37691ae16667a0e249cc5fc Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 25 Jun 2024 15:31:18 +0200 Subject: [PATCH 3/5] dev(script): add a minimal script to try out Caching Control --- tests/dev/dev_cached_http.py | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/dev/dev_cached_http.py diff --git a/tests/dev/dev_cached_http.py b/tests/dev/dev_cached_http.py new file mode 100644 index 0000000..e04ff31 --- /dev/null +++ b/tests/dev/dev_cached_http.py @@ -0,0 +1,45 @@ +import http.client +import logging +from pathlib import Path + +import requests +from cachecontrol import CacheControl +from cachecontrol.caches.file_cache import FileCache + +http.client.HTTPConnection.debuglevel = 1 +logging.basicConfig() +logging.getLogger().setLevel(logging.DEBUG) +req_log = logging.getLogger("requests.packages.urllib3") +req_log.setLevel(logging.DEBUG) +req_log.propagate = True + + +sess = CacheControl( + requests.Session(), cache=FileCache(".web_cache"), cacheable_methods=("HEAD", "GET") +) + + +# get requests +resp = sess.get("https://geotribu.fr") +resp_img = sess.get( + "https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png" +) + +# try again, cache hit expected +resp = sess.get("https://geotribu.fr") +resp_img = sess.get( + "https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png" +) + +# head requests +resp_img = sess.head( + "https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png" +) + + +# try again, cache hit expected +resp_img = sess.head( + "https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png" +) + +print(list(Path(".web_cache").iterdir())) From c883ee8bbd02023775281b4b238c5322cba07dfe Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 25 Jun 2024 15:45:44 +0200 Subject: [PATCH 4/5] fix(tests): add new option to tests --- tests/test_config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_config.py b/tests/test_config.py index 0ee1eef..47b9f68 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -21,8 +21,10 @@ # 3rd party from mkdocs.config.base import Config -# plugin target from mkdocs_rss_plugin.config import RssPluginConfig + +# plugin target +from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER from mkdocs_rss_plugin.plugin import GitRssPlugin # test suite @@ -62,6 +64,7 @@ def test_plugin_config_defaults(self): "abstract_chars_count": 160, "abstract_delimiter": "", "categories": None, + "cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}", "comments_path": None, "date_from_meta": { "as_creation": "git", @@ -105,6 +108,7 @@ def test_plugin_config_image(self): expected = { "abstract_chars_count": 160, "abstract_delimiter": "", + "cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}", "categories": None, "comments_path": None, "date_from_meta": { From 96fec9f714ea420c7ffed786fdfaeb7df726ea88 Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Tue, 25 Jun 2024 15:54:24 +0200 Subject: [PATCH 5/5] docs(cache_dir): add section about new cache_dir option --- docs/configuration.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index afa518d..1515e59 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -195,6 +195,28 @@ Default: `` ---- +### :material-recycle: `cache_dir`: folder where to store plugin's cached files { #cache_dir } + +The plugin implements a caching mechanism, ensuring that a remote media is only get once during its life-cycle on remote HTTP server (using [Cache Control](https://pypi.org/project/CacheControl/) under the hood). It is normally not necessary to specify this setting, except for when you want to change the path within your root directory where HTTP body and metadata files are cached. + +If you want to change it, use: + +``` yaml +plugins: + - rss: + cache_dir: my/custom/dir +``` + +It's strongly recommended to add the path to your `.gitignore` file in the root of your project: + +``` title=".gitignore" +.cache +``` + +Default: `.cache/plugins/rss`. + +---- + ### :material-tag-multiple: `categories`: item categories { #categories } `categories`: list of page metadata values to use as [RSS item categories](https://www.w3schools.com/xml/rss_tag_category_item.asp).