From c3e2e2e45ffb7673a924a1e30cd5cef71967d116 Mon Sep 17 00:00:00 2001 From: blissful Date: Thu, 19 Oct 2023 08:05:10 -0400 Subject: [PATCH] reoptimize getattr call (i fucked it up last time) --- rose/cache.py | 8 +++---- rose/cache_test.py | 5 ++++- rose/collages.py | 2 +- rose/virtualfs.py | 53 ++++++++++++++++++++++++++++++++++++---------- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/rose/cache.py b/rose/cache.py index 7411bfc..4472474 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -1203,12 +1203,12 @@ def list_collages(c: Config) -> Iterator[str]: yield row["name"] -def list_collage_releases(c: Config, collage_name: str) -> Iterator[tuple[int, str]]: - """Returns tuples of (position, release_virtual_dirname).""" +def list_collage_releases(c: Config, collage_name: str) -> Iterator[tuple[int, str, Path]]: + """Returns tuples of (position, release_virtual_dirname, release_source_path).""" with connect(c) as conn: cursor = conn.execute( """ - SELECT cr.position, r.virtual_dirname + SELECT cr.position, r.virtual_dirname, r.source_path FROM collages_releases cr JOIN releases r ON r.id = cr.release_id WHERE cr.collage_name = ? @@ -1217,7 +1217,7 @@ def list_collage_releases(c: Config, collage_name: str) -> Iterator[tuple[int, s (collage_name,), ) for row in cursor: - yield (row["position"], row["virtual_dirname"]) + yield (row["position"], row["virtual_dirname"], Path(row["source_path"])) def release_exists(c: Config, virtual_dirname: str) -> Path | None: diff --git a/rose/cache_test.py b/rose/cache_test.py index 86576ab..12c3cb2 100644 --- a/rose/cache_test.py +++ b/rose/cache_test.py @@ -521,7 +521,10 @@ def test_list_collages(config: Config) -> None: @pytest.mark.usefixtures("seeded_cache") def test_list_collage_releases(config: Config) -> None: releases = list(list_collage_releases(config, "Rose Gold")) - assert set(releases) == {(0, "r1"), (1, "r2")} + assert set(releases) == { + (0, "r1", config.music_source_dir / "r1"), + (1, "r2", config.music_source_dir / "r2"), + } releases = list(list_collage_releases(config, "Ruby Red")) assert releases == [] diff --git a/rose/collages.py b/rose/collages.py index 938ad69..f06b574 100644 --- a/rose/collages.py +++ b/rose/collages.py @@ -90,7 +90,7 @@ def dump_collages(c: Config) -> str: collage_names = list(list_collages(c)) for name in collage_names: out[name] = [] - for pos, virtual_dirname in list_collage_releases(c, name): + for pos, virtual_dirname, _ in list_collage_releases(c, name): out[name].append({"position": pos, "release": virtual_dirname}) return json.dumps(out) diff --git a/rose/virtualfs.py b/rose/virtualfs.py index ab01861..8343895 100644 --- a/rose/virtualfs.py +++ b/rose/virtualfs.py @@ -1,9 +1,11 @@ +import contextlib import errno import logging import os import re import stat import subprocess +import time from collections.abc import Iterator from dataclasses import dataclass from pathlib import Path @@ -49,20 +51,27 @@ def __init__(self, config: Config): self.hide_artists_set = set(config.fuse_hide_artists) self.hide_genres_set = set(config.fuse_hide_genres) self.hide_labels_set = set(config.fuse_hide_labels) - self.getattr_cache: dict[str, dict[str, Any]] = {} + # We cache some items for getattr for performance reasons--after a ls, getattr is serially + # called for each item in the directory, and sequential 1k SQLite reads is quite slow in any + # universe. So whenever we have a readdir, we do a batch read and populate the getattr + # cache. The getattr cache is valid for only 1 second, which prevents stale results from + # being read from it. + # + # The dict is a map of paths to (timestamp, result). The timestamp should be checked upon + # access. + self.getattr_cache: dict[str, tuple[float, dict[str, Any]]] = {} super().__init__() def getattr(self, path: str, fh: int) -> dict[str, Any]: logger.debug(f"Received getattr for {path=} {fh=}") - # We cache the getattr call with lru_cache because this is called _extremely_ often. Like - # for every node that we see in the output of `ls`. - try: - return self.getattr_cache[path] - except KeyError: - pass + with contextlib.suppress(KeyError): + ts, result = self.getattr_cache[path] + if time.time() - ts < 1.0: + logger.debug(f"Returning cached getattr result for {path=}") + return result - logger.debug(f"Recomputing uncached getattr for {path}") + logger.debug(f"Handling uncached getattr for {path=}") p = parse_virtual_path(path) logger.debug(f"Parsed getattr path as {p}") @@ -102,6 +111,9 @@ def readdir(self, path: str, _: int) -> Iterator[str]: yield from [".", ".."] + # Outside of yielding the strings, we also populate the getattr cache here. See the comment + # in __init__ for documentation. + if p.view == "Root": yield from [ "Artists", @@ -114,8 +126,16 @@ def readdir(self, path: str, _: int) -> Iterator[str]: rf = get_release_files(self.config, p.release) for track in rf.tracks: yield track.virtual_filename + self.getattr_cache[path + "/" + track.virtual_filename] = ( + time.time(), + mkstat("file", track.source_path), + ) if rf.cover: yield rf.cover.name + self.getattr_cache[path + "/" + rf.cover.name] = ( + time.time(), + mkstat("file", rf.cover), + ) elif p.artist or p.genre or p.label or p.view == "Releases": if ( (p.artist and p.artist in self.hide_artists_set) @@ -130,29 +150,40 @@ def readdir(self, path: str, _: int) -> Iterator[str]: sanitized_label_filter=p.label, ): yield release.virtual_dirname + self.getattr_cache[path + "/" + release.virtual_dirname] = ( + time.time(), + mkstat("dir", release.source_path), + ) elif p.view == "Artists": for artist in list_artists(self.config): if artist in self.hide_artists_set: continue yield sanitize_filename(artist) + self.getattr_cache[path + "/" + artist] = (time.time(), mkstat("dir")) elif p.view == "Genres": for genre in list_genres(self.config): if genre in self.hide_genres_set: continue yield sanitize_filename(genre) + self.getattr_cache[path + "/" + genre] = (time.time(), mkstat("dir")) elif p.view == "Labels": for label in list_labels(self.config): if label in self.hide_labels_set: continue yield sanitize_filename(label) + self.getattr_cache[path + "/" + label] = (time.time(), mkstat("dir")) elif p.view == "Collages" and p.collage: releases = list(list_collage_releases(self.config, p.collage)) pad_size = max(len(str(r[0])) for r in releases) - for idx, virtual_dirname in releases: - yield f"{str(idx).zfill(pad_size)}. {virtual_dirname}" + for idx, virtual_dirname, source_dir in releases: + dirname = f"{str(idx).zfill(pad_size)}. {virtual_dirname}" + yield dirname + self.getattr_cache[path + "/" + dirname] = (time.time(), mkstat("dir", source_dir)) elif p.view == "Collages": # Don't need to sanitize because the collage names come from filenames. - yield from list_collages(self.config) + for collage in list_collages(self.config): + yield collage + self.getattr_cache[path + "/" + collage] = (time.time(), mkstat("dir")) else: raise fuse.FuseOSError(errno.ENOENT)