From ada1350af70b53ba9b2241617eed1967fa69b4af Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 19 Nov 2024 15:30:48 +0100 Subject: [PATCH] Use 'deflate' for storing elements.json --- changedetectionio/flask_app.py | 23 ++++++++++++++----- changedetectionio/model/Watch.py | 9 ++++---- .../static/js/visual-selector.js | 1 + changedetectionio/store.py | 16 ++++++++++++- .../tests/visualselector/test_fetch_data.py | 8 ++++--- requirements.txt | 2 +- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 2f6be5c131a..6f290feb095 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -1269,12 +1269,23 @@ def static_content(group, filename): # These files should be in our subdirectory try: - # set nocache, set content-type - response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json")) - response.headers['Content-type'] = 'application/json' - response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' - response.headers['Pragma'] = 'no-cache' - response.headers['Expires'] = 0 + # set nocache, set content-type, + # `filename` is actually directory UUID of the watch + watch_directory = str(os.path.join(datastore_o.datastore_path, filename)) + response = None + if os.path.isfile(os.path.join(watch_directory, "elements.deflate")): + response = make_response(send_from_directory(watch_directory, "elements.deflate")) + response.headers['Content-Type'] = 'application/json' + response.headers['Content-Encoding'] = 'deflate' + else: + logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.') + abort(404) + + if response: + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = "0" + return response except FileNotFoundError: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 0898c98a2a5..b9c3d39ff36 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -538,16 +538,17 @@ def save_error_text(self, contents): def save_xpath_data(self, data, as_error=False): import json + import zlib if as_error: - target_path = os.path.join(self.watch_data_dir, "elements-error.json") + target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate") else: - target_path = os.path.join(self.watch_data_dir, "elements.json") + target_path = os.path.join(str(self.watch_data_dir), "elements.deflate") self.ensure_data_dir_exists() - with open(target_path, 'w') as f: - f.write(json.dumps(data)) + with open(target_path, 'wb') as f: + f.write(zlib.compress(json.dumps(data).encode())) f.close() # Save as PNG, PNG is larger but better for doing visual diff in the future diff --git a/changedetectionio/static/js/visual-selector.js b/changedetectionio/static/js/visual-selector.js index 7cc54e8611f..f6f8e79c2f3 100644 --- a/changedetectionio/static/js/visual-selector.js +++ b/changedetectionio/static/js/visual-selector.js @@ -132,6 +132,7 @@ $(document).ready(() => { }).done((data) => { $fetchingUpdateNoticeElem.html("Rendering.."); selectorData = data; + sortScrapedElementsBySize(); console.log(`Reported browser width from backend: ${data['browser_width']}`); diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 697da5bce43..431a779b5cf 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -374,7 +374,7 @@ def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now= def visualselector_data_is_ready(self, watch_uuid): output_path = "{}/{}".format(self.datastore_path, watch_uuid) screenshot_filename = "{}/last-screenshot.png".format(output_path) - elements_index_filename = "{}/elements.json".format(output_path) + elements_index_filename = "{}/elements.deflate".format(output_path) if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) : return True @@ -909,3 +909,17 @@ def update_18(self): if self.data['watching'][uuid].get('in_stock_only'): del (self.data['watching'][uuid]['in_stock_only']) + # Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast. + def update_19(self): + import zlib + + for uuid, watch in self.data['watching'].items(): + json_path = os.path.join(self.datastore_path, uuid, "elements.json") + deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate") + + if os.path.exists(json_path): + with open(json_path, "rb") as f_j: + with open(deflate_path, "wb") as f_d: + f_d.write(zlib.compress(f_j.read())) + os.unlink(json_path) + diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py index de3b90304de..61fce9b0f51 100644 --- a/changedetectionio/tests/visualselector/test_fetch_data.py +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -54,11 +54,13 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" - assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" + assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.deflate')), "xpath elements.deflate data should exist" # Open it and see if it roughly looks correct - with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f: - json.load(f) + with open(os.path.join('test-datastore', uuid, 'elements.deflate'), 'rb') as f: + import zlib + decompressed_data = zlib.decompress(f.read()) + json.load(decompressed_data) # Attempt to fetch it via the web hook that the browser would use res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid)) diff --git a/requirements.txt b/requirements.txt index b5d58f410f1..b483014c6bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -95,4 +95,4 @@ babel # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 greenlet >= 3.0.3 - +zlib