diff --git a/rose/cache.py b/rose/cache.py index a166a6c..7598277 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -125,6 +125,10 @@ def collage_lock_name(collage_name: str) -> str: return f"collage-{collage_name}" +def playlist_lock_name(playlist_name: str) -> str: + return f"playlist-{playlist_name}" + + @dataclass class CachedArtist: name: str @@ -221,6 +225,8 @@ def update_cache(c: Config, force: bool = False) -> None: update_cache_evict_nonexistent_releases(c) update_cache_for_collages(c, None, force) update_cache_evict_nonexistent_collages(c) + update_cache_for_playlists(c, None, force) + update_cache_evict_nonexistent_playlists(c) def update_cache_evict_nonexistent_releases(c: Config) -> None: @@ -797,15 +803,8 @@ def _update_cache_for_releases_executor( virtual_filename += f"{t.disc_number:0>2}-" if t.track_number: virtual_filename += f"{t.track_number:0>2}. " + virtual_filename += f"{t.formatted_artists} - " virtual_filename += t.title or "Unknown Title" - if release.releasetype in [ - "compilation", - "soundtrack", - "remix", - "djmix", - "mixtape", - ]: - virtual_filename += f" (by {t.formatted_artists})" virtual_filename += t.source_path.suffix virtual_filename = _sanitize_filename(virtual_filename) # And in case of a name collision, add an extra number at the end. Iterate to find @@ -1034,11 +1033,14 @@ def update_cache_for_collages( """ Update the read cache to match the data for all stored collages. - This is performance-optimized in the same way as the update releases function. We: + This is performance-optimized in a similar way to the update releases function. We: 1. Execute one big SQL query at the start to fetch the relevant previous caches. 2. Skip reading a file's data if the mtime has not changed since the previous cache update. 3. Only execute a SQLite upsert if the read data differ from the previous caches. + + However, we do not batch writes to the end of the function, nor do we process the collages in + parallel. This is because we should have far fewer collages than releases. """ collage_dir = c.music_source_dir / "!collages" collage_dir.mkdir(exist_ok=True) @@ -1183,6 +1185,167 @@ def update_cache_evict_nonexistent_collages(c: Config) -> None: logger.info(f"Evicted collage {row['name']} from cache") +def update_cache_for_playlists( + c: Config, + # Leave as None to update all playlists. + playlist_names: list[str] | None = None, + force: bool = False, +) -> None: + """ + Update the read cache to match the data for all stored playlists. + + This is performance-optimized in a similar way to the update releases function. We: + + 1. Execute one big SQL query at the start to fetch the relevant previous caches. + 2. Skip reading a file's data if the mtime has not changed since the previous cache update. + 3. Only execute a SQLite upsert if the read data differ from the previous caches. + + However, we do not batch writes to the end of the function, nor do we process the playlists in + parallel. This is because we should have far fewer playlists than releases. + """ + playlist_dir = c.music_source_dir / "!playlists" + playlist_dir.mkdir(exist_ok=True) + + files: list[tuple[Path, str, os.DirEntry[str]]] = [] + for f in os.scandir(str(playlist_dir)): + path = Path(f.path) + if path.suffix != ".toml": + continue + if not path.is_file(): + logger.debug(f"Skipping processing playlist {path.name} because it is not a file") + continue + if playlist_names is None or path.stem in playlist_names: + files.append((path.resolve(), path.stem, f)) + logger.info(f"Refreshing the read cache for {len(files)} playlists") + + cached_playlists: dict[str, CachedPlaylist] = {} + with connect(c) as conn: + cursor = conn.execute( + r""" + SELECT + p.name + , p.source_mtime + , COALESCE(GROUP_CONCAT(pt.track_id, ' \\ '), '') AS track_ids + FROM playlists p + LEFT JOIN playlists_tracks pt ON pt.playlist_name = p.name + GROUP BY p.name + """, + ) + for row in cursor: + cached_playlists[row["name"]] = CachedPlaylist( + name=row["name"], + source_mtime=row["source_mtime"], + track_ids=row["track_ids"].split(r" \\ ") if row["track_ids"] else [], + ) + + # We want to validate that all track IDs exist before we write them. In order to do that, + # we need to know which tracks exist. + cursor = conn.execute("SELECT id FROM tracks") + existing_track_ids = {row["id"] for row in cursor} + + loop_start = time.time() + with connect(c) as conn: + for source_path, name, f in files: + try: + cached_playlist = cached_playlists[name] + except KeyError: + logger.debug(f"First-time unidentified playlist found at {source_path}") + cached_playlist = CachedPlaylist( + name=name, + source_mtime="", + track_ids=[], + ) + + source_mtime = str(f.stat().st_mtime) + if source_mtime == cached_playlist.source_mtime and not force: + logger.debug(f"playlist cache hit (mtime) for {source_path}, reusing cached data") + continue + + logger.debug(f"playlist cache miss (mtime) for {source_path}, reading data from disk") + cached_playlist.source_mtime = source_mtime + + with source_path.open("rb") as fp: + diskdata = tomllib.load(fp) + + # Track the listed tracks that no longer exist. Remove them from the playlist file + # after. + cached_playlist.track_ids = [] + nonexistent_track_idxs: list[int] = [] + for idx, trk in enumerate(diskdata.get("tracks", [])): + if trk["uuid"] not in existing_track_ids: + nonexistent_track_idxs.append(idx) + continue + cached_playlist.track_ids.append(trk["uuid"]) + logger.debug(f"Found {len(cached_playlist.track_ids)} track(s) in {source_path}") + + logger.info(f"Applying cache updates for playlist {cached_playlist.name}") + conn.execute( + """ + INSERT INTO playlists (name, source_mtime) VALUES (?, ?) + ON CONFLICT (name) DO UPDATE SET source_mtime = excluded.source_mtime + """, + (cached_playlist.name, cached_playlist.source_mtime), + ) + conn.execute( + "DELETE FROM playlists_tracks WHERE playlist_name = ?", + (cached_playlist.name,), + ) + args: list[Any] = [] + for position, rid in enumerate(cached_playlist.track_ids): + args.extend([cached_playlist.name, rid, position + 1]) + if args: + conn.execute( + f""" + INSERT INTO playlists_tracks (playlist_name, track_id, position) + VALUES {','.join(['(?, ?, ?)'] * len(cached_playlist.track_ids))} + """, + args, + ) + + if nonexistent_track_idxs: + new_diskdata_tracks: list[dict[str, str]] = [] + removed_tracks: list[str] = [] + with lock(c, playlist_lock_name(name)): + # Re-read disk data here in case it changed. Super rare case, but better to be + # correct than suffer from niche unexplainable bugs. + with source_path.open("rb") as fp: + diskdata = tomllib.load(fp) + for idx, trk in enumerate(diskdata.get("tracks", [])): + if idx in nonexistent_track_idxs: + removed_tracks.append(trk["description_meta"]) + continue + new_diskdata_tracks.append(trk) + with source_path.open("wb") as fp: + tomli_w.dump({"tracks": new_diskdata_tracks}, fp) + logger.info( + f"Removing nonexistent tracks from playlist {cached_playlist.name}: " + f"{','.join(removed_tracks)}" + ) + + logger.debug(f"playlist update loop time {time.time() - loop_start=}") + + +def update_cache_evict_nonexistent_playlists(c: Config) -> None: + logger.info("Evicting cached playlists that are not on disk") + playlist_names: list[str] = [] + for f in os.scandir(c.music_source_dir / "!playlists"): + p = Path(f.path) + if p.is_file() and p.suffix == ".toml": + playlist_names.append(p.stem) + + with connect(c) as conn: + cursor = conn.execute( + f""" + DELETE FROM playlists + WHERE name NOT IN ({",".join(["?"] * len(playlist_names))}) + RETURNING name + """, + playlist_names, + ) + for row in cursor: + logger.info(f"Evicted playlist {row['name']} from cache") + + def list_releases( c: Config, sanitized_artist_filter: str | None = None, @@ -1488,6 +1651,30 @@ def list_labels(c: Config) -> Iterator[tuple[str, str]]: yield row["label"], row["label_sanitized"] +def list_playlists(c: Config) -> Iterator[str]: + with connect(c) as conn: + cursor = conn.execute("SELECT DISTINCT name FROM playlists") + for row in cursor: + yield row["name"] + + +def list_playlist_tracks(c: Config, playlist_name: str) -> Iterator[tuple[int, str, Path]]: + """Returns tuples of (position, track_virtual_filename, track_source_path).""" + with connect(c) as conn: + cursor = conn.execute( + """ + SELECT pt.position, t.virtual_filename, t.source_path + FROM playlists_tracks pt + JOIN tracks t ON t.id = pt.track_id + WHERE pt.playlist_name = ? + ORDER BY pt.position + """, + (playlist_name,), + ) + for row in cursor: + yield (row["position"], row["virtual_filename"], Path(row["source_path"])) + + def list_collages(c: Config) -> Iterator[str]: with connect(c) as conn: cursor = conn.execute("SELECT DISTINCT name FROM collages") @@ -1595,17 +1782,11 @@ def collage_exists(c: Config, name: str) -> bool: return bool(cursor.fetchone()[0]) -def collage_has_release(c: Config, collage_name: str, release_virtual_dirname: str) -> bool: +def playlist_exists(c: Config, name: str) -> bool: with connect(c) as conn: cursor = conn.execute( - """ - SELECT EXISTS( - SELECT * - FROM collages_releases cr JOIN releases r ON r.id = cr.release_id - WHERE cr.collage_name = ? AND r.virtual_dirname = ? - ) - """, - (collage_name, release_virtual_dirname), + "SELECT EXISTS(SELECT * FROM playlists WHERE name = ?)", + (name,), ) return bool(cursor.fetchone()[0]) diff --git a/rose/cache_test.py b/rose/cache_test.py index 43418be..ceb81eb 100644 --- a/rose/cache_test.py +++ b/rose/cache_test.py @@ -7,7 +7,7 @@ import pytest import tomllib -from conftest import TEST_COLLAGE_1, TEST_RELEASE_1, TEST_RELEASE_2 +from conftest import TEST_COLLAGE_1, TEST_PLAYLIST_1, TEST_RELEASE_1, TEST_RELEASE_2 from rose.cache import ( CACHE_SCHEMA_PATH, STORED_DATA_FILE_REGEX, @@ -29,9 +29,12 @@ list_collages, list_genres, list_labels, + list_playlist_tracks, + list_playlists, list_releases, lock, migrate_database, + playlist_exists, release_exists, track_exists, update_cache, @@ -555,6 +558,48 @@ def test_update_cache_collages_nonexistent_release_id(config: Config) -> None: assert data["releases"] == [] +def test_update_cache_playlists(config: Config) -> None: + shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name) + shutil.copytree(TEST_PLAYLIST_1, config.music_source_dir / "!playlists") + update_cache(config) + + # Assert that the playlist metadata was read correctly. + with connect(config) as conn: + cursor = conn.execute("SELECT name, source_mtime FROM playlists") + rows = cursor.fetchall() + assert len(rows) == 1 + row = rows[0] + assert row["name"] == "Lala Lisa" + assert row["source_mtime"] + + cursor = conn.execute( + "SELECT playlist_name, track_id, position FROM playlists_tracks ORDER BY position" + ) + assert [dict(r) for r in cursor] == [ + {"playlist_name": "Lala Lisa", "track_id": "iloveloona", "position": 1}, + {"playlist_name": "Lala Lisa", "track_id": "ilovetwice", "position": 2}, + ] + + +def test_update_cache_playlists_nonexistent_track_id(config: Config) -> None: + shutil.copytree(TEST_PLAYLIST_1, config.music_source_dir / "!playlists") + update_cache(config) + + # Assert that a nonexistent track was not read. + with connect(config) as conn: + cursor = conn.execute("SELECT name FROM playlists") + assert cursor.fetchone()["name"] == "Lala Lisa" + + cursor = conn.execute("SELECT playlist_name, track_id, position FROM playlists_tracks") + rows = cursor.fetchall() + assert not rows + + # Assert that source file was updated to remove the track. + with (config.music_source_dir / "!playlists" / "Lala Lisa.toml").open("rb") as fp: + data = tomllib.load(fp) + assert data["tracks"] == [] + + @pytest.mark.usefixtures("seeded_cache") def test_list_releases(config: Config) -> None: releases = list(list_releases(config)) @@ -810,6 +855,23 @@ def test_list_collage_releases(config: Config) -> None: assert releases == [] +@pytest.mark.usefixtures("seeded_cache") +def test_list_playlists(config: Config) -> None: + playlists = list(list_playlists(config)) + assert set(playlists) == {"Lala Lisa", "Turtle Rabbit"} + + +@pytest.mark.usefixtures("seeded_cache") +def test_list_playlist_tracks(config: Config) -> None: + tracks = list(list_playlist_tracks(config, "Lala Lisa")) + assert set(tracks) == { + (0, "01.m4a", config.music_source_dir / "r1" / "01.m4a"), + (1, "01.m4a", config.music_source_dir / "r2" / "01.m4a"), + } + tracks = list(list_playlist_tracks(config, "Turtle Rabbit")) + assert tracks == [] + + @pytest.mark.usefixtures("seeded_cache") def test_release_exists(config: Config) -> None: assert release_exists(config, "r1") @@ -852,3 +914,9 @@ def test_label_exists(config: Config) -> None: def test_collage_exists(config: Config) -> None: assert collage_exists(config, "Rose Gold") assert not collage_exists(config, "lalala") + + +@pytest.mark.usefixtures("seeded_cache") +def test_playlist_exists(config: Config) -> None: + assert playlist_exists(config, "Lala Lisa") + assert not playlist_exists(config, "lalala") diff --git a/rose/releases_test.py b/rose/releases_test.py index b779ef3..c907b1e 100644 --- a/rose/releases_test.py +++ b/rose/releases_test.py @@ -136,7 +136,7 @@ def test_edit_release(monkeypatch: Any, config: Config, source_dir: Path) -> Non id=track_ids[0], source_path=release_path / "01.m4a", source_mtime=tracks[0].source_mtime, - virtual_filename="01. I Do Like That.m4a", + virtual_filename="01. BLACKPINK - I Do Like That.m4a", title="I Do Like That", release_id=release_id, track_number="1", @@ -151,7 +151,7 @@ def test_edit_release(monkeypatch: Any, config: Config, source_dir: Path) -> Non id=track_ids[1], source_path=release_path / "02.m4a", source_mtime=tracks[1].source_mtime, - virtual_filename="02. All Eyes On Me.m4a", + virtual_filename="02. JISOO - All Eyes On Me.m4a", title="All Eyes On Me", release_id=release_id, track_number="2", diff --git a/rose/virtualfs.py b/rose/virtualfs.py index c0d5869..00c9ecd 100644 --- a/rose/virtualfs.py +++ b/rose/virtualfs.py @@ -16,7 +16,6 @@ from rose.cache import ( artist_exists, collage_exists, - collage_has_release, cover_exists, genre_exists, get_release, @@ -120,8 +119,12 @@ def getattr(self, path: str, fh: int) -> dict[str, Any]: logger.debug(f"Parsed getattr path as {p}") # Some early guards just in case. - if p.release and p.collage and not collage_has_release(self.config, p.collage, p.release): - raise fuse.FuseOSError(errno.ENOENT) + if p.release and p.collage: + for _, virtual_dirname, _ in list_collage_releases(self.config, p.collage): + if virtual_dirname == p.release: + break + else: + raise fuse.FuseOSError(errno.ENOENT) if p.release and p.file: if tp := track_exists(self.config, p.release, p.file):