Skip to content

Commit

Permalink
add playlist reading to cache
Browse files Browse the repository at this point in the history
  • Loading branch information
azuline committed Oct 25, 2023
1 parent b3d70de commit a58578d
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 24 deletions.
217 changes: 199 additions & 18 deletions rose/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def collage_lock_name(collage_name: str) -> str:
return f"collage-{collage_name}"


def playlist_lock_name(playlist_name: str) -> str:
return f"playlist-{playlist_name}"


@dataclass
class CachedArtist:
name: str
Expand Down Expand Up @@ -221,6 +225,8 @@ def update_cache(c: Config, force: bool = False) -> None:
update_cache_evict_nonexistent_releases(c)
update_cache_for_collages(c, None, force)
update_cache_evict_nonexistent_collages(c)
update_cache_for_playlists(c, None, force)
update_cache_evict_nonexistent_playlists(c)


def update_cache_evict_nonexistent_releases(c: Config) -> None:
Expand Down Expand Up @@ -797,15 +803,8 @@ def _update_cache_for_releases_executor(
virtual_filename += f"{t.disc_number:0>2}-"
if t.track_number:
virtual_filename += f"{t.track_number:0>2}. "
virtual_filename += f"{t.formatted_artists} - "
virtual_filename += t.title or "Unknown Title"
if release.releasetype in [
"compilation",
"soundtrack",
"remix",
"djmix",
"mixtape",
]:
virtual_filename += f" (by {t.formatted_artists})"
virtual_filename += t.source_path.suffix
virtual_filename = _sanitize_filename(virtual_filename)
# And in case of a name collision, add an extra number at the end. Iterate to find
Expand Down Expand Up @@ -1034,11 +1033,14 @@ def update_cache_for_collages(
"""
Update the read cache to match the data for all stored collages.
This is performance-optimized in the same way as the update releases function. We:
This is performance-optimized in a similar way to the update releases function. We:
1. Execute one big SQL query at the start to fetch the relevant previous caches.
2. Skip reading a file's data if the mtime has not changed since the previous cache update.
3. Only execute a SQLite upsert if the read data differ from the previous caches.
However, we do not batch writes to the end of the function, nor do we process the collages in
parallel. This is because we should have far fewer collages than releases.
"""
collage_dir = c.music_source_dir / "!collages"
collage_dir.mkdir(exist_ok=True)
Expand Down Expand Up @@ -1183,6 +1185,167 @@ def update_cache_evict_nonexistent_collages(c: Config) -> None:
logger.info(f"Evicted collage {row['name']} from cache")


def update_cache_for_playlists(
c: Config,
# Leave as None to update all playlists.
playlist_names: list[str] | None = None,
force: bool = False,
) -> None:
"""
Update the read cache to match the data for all stored playlists.
This is performance-optimized in a similar way to the update releases function. We:
1. Execute one big SQL query at the start to fetch the relevant previous caches.
2. Skip reading a file's data if the mtime has not changed since the previous cache update.
3. Only execute a SQLite upsert if the read data differ from the previous caches.
However, we do not batch writes to the end of the function, nor do we process the playlists in
parallel. This is because we should have far fewer playlists than releases.
"""
playlist_dir = c.music_source_dir / "!playlists"
playlist_dir.mkdir(exist_ok=True)

files: list[tuple[Path, str, os.DirEntry[str]]] = []
for f in os.scandir(str(playlist_dir)):
path = Path(f.path)
if path.suffix != ".toml":
continue
if not path.is_file():
logger.debug(f"Skipping processing playlist {path.name} because it is not a file")
continue
if playlist_names is None or path.stem in playlist_names:
files.append((path.resolve(), path.stem, f))
logger.info(f"Refreshing the read cache for {len(files)} playlists")

cached_playlists: dict[str, CachedPlaylist] = {}
with connect(c) as conn:
cursor = conn.execute(
r"""
SELECT
p.name
, p.source_mtime
, COALESCE(GROUP_CONCAT(pt.track_id, ' \\ '), '') AS track_ids
FROM playlists p
LEFT JOIN playlists_tracks pt ON pt.playlist_name = p.name
GROUP BY p.name
""",
)
for row in cursor:
cached_playlists[row["name"]] = CachedPlaylist(
name=row["name"],
source_mtime=row["source_mtime"],
track_ids=row["track_ids"].split(r" \\ ") if row["track_ids"] else [],
)

# We want to validate that all track IDs exist before we write them. In order to do that,
# we need to know which tracks exist.
cursor = conn.execute("SELECT id FROM tracks")
existing_track_ids = {row["id"] for row in cursor}

loop_start = time.time()
with connect(c) as conn:
for source_path, name, f in files:
try:
cached_playlist = cached_playlists[name]
except KeyError:
logger.debug(f"First-time unidentified playlist found at {source_path}")
cached_playlist = CachedPlaylist(
name=name,
source_mtime="",
track_ids=[],
)

source_mtime = str(f.stat().st_mtime)
if source_mtime == cached_playlist.source_mtime and not force:
logger.debug(f"playlist cache hit (mtime) for {source_path}, reusing cached data")
continue

logger.debug(f"playlist cache miss (mtime) for {source_path}, reading data from disk")
cached_playlist.source_mtime = source_mtime

with source_path.open("rb") as fp:
diskdata = tomllib.load(fp)

# Track the listed tracks that no longer exist. Remove them from the playlist file
# after.
cached_playlist.track_ids = []
nonexistent_track_idxs: list[int] = []
for idx, trk in enumerate(diskdata.get("tracks", [])):
if trk["uuid"] not in existing_track_ids:
nonexistent_track_idxs.append(idx)
continue
cached_playlist.track_ids.append(trk["uuid"])
logger.debug(f"Found {len(cached_playlist.track_ids)} track(s) in {source_path}")

logger.info(f"Applying cache updates for playlist {cached_playlist.name}")
conn.execute(
"""
INSERT INTO playlists (name, source_mtime) VALUES (?, ?)
ON CONFLICT (name) DO UPDATE SET source_mtime = excluded.source_mtime
""",
(cached_playlist.name, cached_playlist.source_mtime),
)
conn.execute(
"DELETE FROM playlists_tracks WHERE playlist_name = ?",
(cached_playlist.name,),
)
args: list[Any] = []
for position, rid in enumerate(cached_playlist.track_ids):
args.extend([cached_playlist.name, rid, position + 1])
if args:
conn.execute(
f"""
INSERT INTO playlists_tracks (playlist_name, track_id, position)
VALUES {','.join(['(?, ?, ?)'] * len(cached_playlist.track_ids))}
""",
args,
)

if nonexistent_track_idxs:
new_diskdata_tracks: list[dict[str, str]] = []
removed_tracks: list[str] = []
with lock(c, playlist_lock_name(name)):
# Re-read disk data here in case it changed. Super rare case, but better to be
# correct than suffer from niche unexplainable bugs.
with source_path.open("rb") as fp:
diskdata = tomllib.load(fp)
for idx, trk in enumerate(diskdata.get("tracks", [])):
if idx in nonexistent_track_idxs:
removed_tracks.append(trk["description_meta"])
continue
new_diskdata_tracks.append(trk)
with source_path.open("wb") as fp:
tomli_w.dump({"tracks": new_diskdata_tracks}, fp)
logger.info(
f"Removing nonexistent tracks from playlist {cached_playlist.name}: "
f"{','.join(removed_tracks)}"
)

logger.debug(f"playlist update loop time {time.time() - loop_start=}")


def update_cache_evict_nonexistent_playlists(c: Config) -> None:
logger.info("Evicting cached playlists that are not on disk")
playlist_names: list[str] = []
for f in os.scandir(c.music_source_dir / "!playlists"):
p = Path(f.path)
if p.is_file() and p.suffix == ".toml":
playlist_names.append(p.stem)

with connect(c) as conn:
cursor = conn.execute(
f"""
DELETE FROM playlists
WHERE name NOT IN ({",".join(["?"] * len(playlist_names))})
RETURNING name
""",
playlist_names,
)
for row in cursor:
logger.info(f"Evicted playlist {row['name']} from cache")


def list_releases(
c: Config,
sanitized_artist_filter: str | None = None,
Expand Down Expand Up @@ -1488,6 +1651,30 @@ def list_labels(c: Config) -> Iterator[tuple[str, str]]:
yield row["label"], row["label_sanitized"]


def list_playlists(c: Config) -> Iterator[str]:
with connect(c) as conn:
cursor = conn.execute("SELECT DISTINCT name FROM playlists")
for row in cursor:
yield row["name"]


def list_playlist_tracks(c: Config, playlist_name: str) -> Iterator[tuple[int, str, Path]]:
"""Returns tuples of (position, track_virtual_filename, track_source_path)."""
with connect(c) as conn:
cursor = conn.execute(
"""
SELECT pt.position, t.virtual_filename, t.source_path
FROM playlists_tracks pt
JOIN tracks t ON t.id = pt.track_id
WHERE pt.playlist_name = ?
ORDER BY pt.position
""",
(playlist_name,),
)
for row in cursor:
yield (row["position"], row["virtual_filename"], Path(row["source_path"]))


def list_collages(c: Config) -> Iterator[str]:
with connect(c) as conn:
cursor = conn.execute("SELECT DISTINCT name FROM collages")
Expand Down Expand Up @@ -1595,17 +1782,11 @@ def collage_exists(c: Config, name: str) -> bool:
return bool(cursor.fetchone()[0])


def collage_has_release(c: Config, collage_name: str, release_virtual_dirname: str) -> bool:
def playlist_exists(c: Config, name: str) -> bool:
with connect(c) as conn:
cursor = conn.execute(
"""
SELECT EXISTS(
SELECT *
FROM collages_releases cr JOIN releases r ON r.id = cr.release_id
WHERE cr.collage_name = ? AND r.virtual_dirname = ?
)
""",
(collage_name, release_virtual_dirname),
"SELECT EXISTS(SELECT * FROM playlists WHERE name = ?)",
(name,),
)
return bool(cursor.fetchone()[0])

Expand Down
70 changes: 69 additions & 1 deletion rose/cache_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
import tomllib

from conftest import TEST_COLLAGE_1, TEST_RELEASE_1, TEST_RELEASE_2
from conftest import TEST_COLLAGE_1, TEST_PLAYLIST_1, TEST_RELEASE_1, TEST_RELEASE_2
from rose.cache import (
CACHE_SCHEMA_PATH,
STORED_DATA_FILE_REGEX,
Expand All @@ -29,9 +29,12 @@
list_collages,
list_genres,
list_labels,
list_playlist_tracks,
list_playlists,
list_releases,
lock,
migrate_database,
playlist_exists,
release_exists,
track_exists,
update_cache,
Expand Down Expand Up @@ -555,6 +558,48 @@ def test_update_cache_collages_nonexistent_release_id(config: Config) -> None:
assert data["releases"] == []


def test_update_cache_playlists(config: Config) -> None:
shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name)
shutil.copytree(TEST_PLAYLIST_1, config.music_source_dir / "!playlists")
update_cache(config)

# Assert that the playlist metadata was read correctly.
with connect(config) as conn:
cursor = conn.execute("SELECT name, source_mtime FROM playlists")
rows = cursor.fetchall()
assert len(rows) == 1
row = rows[0]
assert row["name"] == "Lala Lisa"
assert row["source_mtime"]

cursor = conn.execute(
"SELECT playlist_name, track_id, position FROM playlists_tracks ORDER BY position"
)
assert [dict(r) for r in cursor] == [
{"playlist_name": "Lala Lisa", "track_id": "iloveloona", "position": 1},
{"playlist_name": "Lala Lisa", "track_id": "ilovetwice", "position": 2},
]


def test_update_cache_playlists_nonexistent_track_id(config: Config) -> None:
shutil.copytree(TEST_PLAYLIST_1, config.music_source_dir / "!playlists")
update_cache(config)

# Assert that a nonexistent track was not read.
with connect(config) as conn:
cursor = conn.execute("SELECT name FROM playlists")
assert cursor.fetchone()["name"] == "Lala Lisa"

cursor = conn.execute("SELECT playlist_name, track_id, position FROM playlists_tracks")
rows = cursor.fetchall()
assert not rows

# Assert that source file was updated to remove the track.
with (config.music_source_dir / "!playlists" / "Lala Lisa.toml").open("rb") as fp:
data = tomllib.load(fp)
assert data["tracks"] == []


@pytest.mark.usefixtures("seeded_cache")
def test_list_releases(config: Config) -> None:
releases = list(list_releases(config))
Expand Down Expand Up @@ -810,6 +855,23 @@ def test_list_collage_releases(config: Config) -> None:
assert releases == []


@pytest.mark.usefixtures("seeded_cache")
def test_list_playlists(config: Config) -> None:
playlists = list(list_playlists(config))
assert set(playlists) == {"Lala Lisa", "Turtle Rabbit"}


@pytest.mark.usefixtures("seeded_cache")
def test_list_playlist_tracks(config: Config) -> None:
tracks = list(list_playlist_tracks(config, "Lala Lisa"))
assert set(tracks) == {
(0, "01.m4a", config.music_source_dir / "r1" / "01.m4a"),
(1, "01.m4a", config.music_source_dir / "r2" / "01.m4a"),
}
tracks = list(list_playlist_tracks(config, "Turtle Rabbit"))
assert tracks == []


@pytest.mark.usefixtures("seeded_cache")
def test_release_exists(config: Config) -> None:
assert release_exists(config, "r1")
Expand Down Expand Up @@ -852,3 +914,9 @@ def test_label_exists(config: Config) -> None:
def test_collage_exists(config: Config) -> None:
assert collage_exists(config, "Rose Gold")
assert not collage_exists(config, "lalala")


@pytest.mark.usefixtures("seeded_cache")
def test_playlist_exists(config: Config) -> None:
assert playlist_exists(config, "Lala Lisa")
assert not playlist_exists(config, "lalala")
4 changes: 2 additions & 2 deletions rose/releases_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_edit_release(monkeypatch: Any, config: Config, source_dir: Path) -> Non
id=track_ids[0],
source_path=release_path / "01.m4a",
source_mtime=tracks[0].source_mtime,
virtual_filename="01. I Do Like That.m4a",
virtual_filename="01. BLACKPINK - I Do Like That.m4a",
title="I Do Like That",
release_id=release_id,
track_number="1",
Expand All @@ -151,7 +151,7 @@ def test_edit_release(monkeypatch: Any, config: Config, source_dir: Path) -> Non
id=track_ids[1],
source_path=release_path / "02.m4a",
source_mtime=tracks[1].source_mtime,
virtual_filename="02. All Eyes On Me.m4a",
virtual_filename="02. JISOO - All Eyes On Me.m4a",
title="All Eyes On Me",
release_id=release_id,
track_number="2",
Expand Down
Loading

0 comments on commit a58578d

Please sign in to comment.