Skip to content

Commit

Permalink
Use 'deflate' for storing elements.json
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon committed Nov 19, 2024
1 parent fdba6b5 commit ada1350
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 15 deletions.
23 changes: 17 additions & 6 deletions changedetectionio/flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,12 +1269,23 @@ def static_content(group, filename):

# These files should be in our subdirectory
try:
# set nocache, set content-type
response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json"))
response.headers['Content-type'] = 'application/json'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
# set nocache, set content-type,
# `filename` is actually directory UUID of the watch
watch_directory = str(os.path.join(datastore_o.datastore_path, filename))
response = None
if os.path.isfile(os.path.join(watch_directory, "elements.deflate")):
response = make_response(send_from_directory(watch_directory, "elements.deflate"))
response.headers['Content-Type'] = 'application/json'
response.headers['Content-Encoding'] = 'deflate'
else:
logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
abort(404)

if response:
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = "0"

return response

except FileNotFoundError:
Expand Down
9 changes: 5 additions & 4 deletions changedetectionio/model/Watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,16 +538,17 @@ def save_error_text(self, contents):

def save_xpath_data(self, data, as_error=False):
import json
import zlib

if as_error:
target_path = os.path.join(self.watch_data_dir, "elements-error.json")
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
else:
target_path = os.path.join(self.watch_data_dir, "elements.json")
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")

self.ensure_data_dir_exists()

with open(target_path, 'w') as f:
f.write(json.dumps(data))
with open(target_path, 'wb') as f:
f.write(zlib.compress(json.dumps(data).encode()))
f.close()

# Save as PNG, PNG is larger but better for doing visual diff in the future
Expand Down
1 change: 1 addition & 0 deletions changedetectionio/static/js/visual-selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ $(document).ready(() => {
}).done((data) => {
$fetchingUpdateNoticeElem.html("Rendering..");
selectorData = data;

sortScrapedElementsBySize();
console.log(`Reported browser width from backend: ${data['browser_width']}`);

Expand Down
16 changes: 15 additions & 1 deletion changedetectionio/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=
def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path)
elements_index_filename = "{}/elements.json".format(output_path)
elements_index_filename = "{}/elements.deflate".format(output_path)
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
return True

Expand Down Expand Up @@ -909,3 +909,17 @@ def update_18(self):
if self.data['watching'][uuid].get('in_stock_only'):
del (self.data['watching'][uuid]['in_stock_only'])

# Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast.
def update_19(self):
import zlib

for uuid, watch in self.data['watching'].items():
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")

if os.path.exists(json_path):
with open(json_path, "rb") as f_j:
with open(deflate_path, "wb") as f_d:
f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path)

8 changes: 5 additions & 3 deletions changedetectionio/tests/visualselector/test_fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage


assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.deflate')), "xpath elements.deflate data should exist"

# Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
json.load(f)
with open(os.path.join('test-datastore', uuid, 'elements.deflate'), 'rb') as f:
import zlib
decompressed_data = zlib.decompress(f.read())
json.load(decompressed_data)

# Attempt to fetch it via the web hook that the browser would use
res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid))
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,4 @@ babel
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3


zlib

0 comments on commit ada1350

Please sign in to comment.