diff --git a/flake.nix b/flake.nix index acf1bc6..f052d8e 100644 --- a/flake.nix +++ b/flake.nix @@ -15,10 +15,22 @@ let pkgs = import nixpkgs { inherit system; }; python = pkgs.python311; + uuid6-python = python.pkgs.buildPythonPackage { + pname = "uuid6-python"; + version = "2023.5.2"; + src = pkgs.fetchFromGitHub { + owner = "oittaa"; + repo = "uuid6-python"; + rev = "d65fff8bbfcd0bca78577b3d07cb3c9979cd69e7"; + hash = "sha256-Typif9Ags1Eaz2WMCh+MnsbTqJdTPgYpCCReQY8pVqI="; + }; + doCheck = false; + }; prod-deps = with python.pkgs; [ click fuse mutagen + uuid6-python yoyo-migrations ]; dev-deps = with python.pkgs; [ @@ -58,6 +70,7 @@ (python.withPackages (_: prod-deps ++ dev-deps)) ruff dev-cli + nodePackages.pyright ]; }) ]; diff --git a/migrations/20231009_01_qlEHa-bootstrap.rollback.sql b/migrations/20231009_01_qlEHa-bootstrap.rollback.sql index 77d8423..3afa2d6 100644 --- a/migrations/20231009_01_qlEHa-bootstrap.rollback.sql +++ b/migrations/20231009_01_qlEHa-bootstrap.rollback.sql @@ -7,8 +7,8 @@ DROP TABLE collections_releases; DROP TABLE collections; DROP TABLE tracks_artists; DROP TABLE releases_artists; -DROP TABLE artists; DROP TABLE artist_role_enum; DROP TABLE tracks; +DROP TABLE releases_genres; DROP TABLE releases; DROP TABLE release_type_enum; diff --git a/migrations/20231009_01_qlEHa-bootstrap.sql b/migrations/20231009_01_qlEHa-bootstrap.sql index e6aabaf..3d3469d 100644 --- a/migrations/20231009_01_qlEHa-bootstrap.sql +++ b/migrations/20231009_01_qlEHa-bootstrap.sql @@ -16,21 +16,29 @@ INSERT INTO release_type_enum (value) VALUES ('unknown'); CREATE TABLE releases ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, source_path TEXT NOT NULL UNIQUE, title TEXT NOT NULL, release_type TEXT NOT NULL REFERENCES release_type_enum(value), - release_year INTEGER + release_year INTEGER, + new BOOLEAN NOT NULL DEFAULT true ); CREATE INDEX releases_source_path ON releases(source_path); CREATE INDEX releases_release_year ON releases(release_year); +CREATE TABLE releases_genres ( + release_id TEXT, + genre TEXT, + PRIMARY KEY (release_id, genre) +); +CREATE INDEX releases_genres_genre ON releases_genres(genre); + CREATE TABLE tracks ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, source_path TEXT NOT NULL UNIQUE, source_mtime TIMESTAMP NOT NULL, title TEXT NOT NULL, - release_id INTEGER NOT NULL REFERENCES releases(id), + release_id TEXT NOT NULL REFERENCES releases(id), track_number TEXT NOT NULL, disc_number TEXT NOT NULL, duration_seconds INTEGER NOT NULL @@ -39,51 +47,44 @@ CREATE INDEX tracks_source_path ON tracks(source_path); CREATE INDEX tracks_release_id ON tracks(release_id); CREATE INDEX tracks_ordering ON tracks(release_id, disc_number, track_number); -CREATE TABLE artists ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL -); -CREATE INDEX artists_name ON artists(name); - CREATE TABLE artist_role_enum (value TEXT PRIMARY KEY); INSERT INTO artist_role_enum (value) VALUES ('main'), - ('feature'), + ('guest'), ('remixer'), ('producer'), ('composer'), - ('conductor'), ('djmixer'); CREATE TABLE releases_artists ( - release_id INTEGER REFERENCES releases(id) ON DELETE CASCADE, - artist_id INTEGER REFERENCES artists(id) ON DELETE CASCADE, + release_id TEXT REFERENCES releases(id) ON DELETE CASCADE, + artist TEXT, role TEXT REFERENCES artist_role_enum(value), - PRIMARY KEY (release_id, artist_id) + PRIMARY KEY (release_id, artist) ); CREATE INDEX releases_artists_release_id ON releases_artists(release_id); -CREATE INDEX releases_artists_artist_id ON releases_artists(artist_id); +CREATE INDEX releases_artists_artist ON releases_artists(artist); CREATE TABLE tracks_artists ( - track_id INTEGER REFERENCES tracks(id) ON DELETE CASCADE, - artist_id INTEGER REFERENCES artists(id) ON DELETE CASCADE, + track_id TEXT REFERENCES tracks(id) ON DELETE CASCADE, + artist TEXT, role TEXT REFERENCES artist_role_enum(value), - PRIMARY KEY (track_id, artist_id) + PRIMARY KEY (track_id, artist) ); CREATE INDEX tracks_artists_track_id ON tracks_artists(track_id); -CREATE INDEX tracks_artists_artist_id ON tracks_artists(artist_id); +CREATE INDEX tracks_artists_artist ON tracks_artists(artist); CREATE TABLE collections ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, name TEXT NOT NULL, - source_path TEXT UNIQUE NOT NULL + source_path TEXT UNIQUE NOT NULL, source_mtime TIMESTAMP NOT NULL ); CREATE INDEX collections_source_path ON collections(source_path); CREATE TABLE collections_releases ( - collection_id INTEGER REFERENCES collections(id) ON DELETE CASCADE, - release_id INTEGER REFERENCES releases(id) ON DELETE CASCADE, + collection_id TEXT REFERENCES collections(id) ON DELETE CASCADE, + release_id TEXT REFERENCES releases(id) ON DELETE CASCADE, position INTEGER NOT NULL ); CREATE INDEX collections_releases_collection_id ON collections_releases(collection_id); @@ -91,7 +92,7 @@ CREATE INDEX collections_releases_release_id ON collections_releases(release_id) CREATE UNIQUE INDEX collections_releases_collection_position ON collections_releases(collection_id, position); CREATE TABLE playlists ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, name TEXT NOT NULL, source_path TEXT UNIQUE NOT NULL, source_mtime TIMESTAMP NOT NULL @@ -99,8 +100,8 @@ CREATE TABLE playlists ( CREATE INDEX playlists_source_path ON playlists(source_path); CREATE TABLE playlists_tracks ( - playlist_id INTEGER REFERENCES playlists(id) ON DELETE CASCADE, - track_id INTEGER REFERENCES tracks(id) ON DELETE CASCADE, + playlist_id TEXT REFERENCES playlists(id) ON DELETE CASCADE, + track_id TEXT REFERENCES tracks(id) ON DELETE CASCADE, position INTEGER NOT NULL ); CREATE INDEX playlists_tracks_playlist_id ON playlists_tracks(playlist_id); diff --git a/pyproject.toml b/pyproject.toml index aaf47ba..68d4668 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,8 @@ select = [ # "PTH", ] ignore = [ + # Allow shadowing builtins on attributes. + "A003", ] line-length = 100 exclude = [".venv"] diff --git a/rose/cache/process.py b/rose/cache/process.py new file mode 100644 index 0000000..fedc58e --- /dev/null +++ b/rose/cache/process.py @@ -0,0 +1,214 @@ +import os +import re +from dataclasses import asdict, dataclass +from pathlib import Path + +import uuid6 + +from rose.cache.database import connect +from rose.foundation.conf import Config +from rose.tagger import AudioFile + +SUPPORTED_EXTENSIONS = [ + ".mp3", + ".m4a", + ".ogg", + ".opus", + ".flac", +] + +SUPPORTED_RELEASE_TYPES = [ + "album", + "single", + "ep", + "compilation", + "soundtrack", + "live", + "remix", + "djmix", + "mixtape", + "other", + "unknown", +] + + +@dataclass +class CachedRelease: + id: str + source_path: Path + title: str + release_type: str + release_year: int | None + new: bool + + +@dataclass +class CachedTrack: + id: str + source_path: Path + source_mtime: int + title: str + release_id: str + trackno: str + discno: str + duration_sec: int + + +@dataclass +class CachedArtist: + id: str + name: str + + +def process_release(c: Config, release_dir: Path) -> None: + """ + Given a release's directory, update the cache entry based on the release's metadata. + If this is a new release or track, update the directory and file names to include the UUIDs. + """ + with connect(c) as conn: + # The release will be updated based on the album tags of the first track. + release: CachedRelease | None = None + # But first, parse the release_id from the directory name. If the directory name does not + # contain a release_id, generate one and rename the directory. + release_id = _parse_uuid_from_path(release_dir) + if not release_id: + release_id = str(uuid6.uuid7()) + release_dir = _rename_with_uuid(release_dir, release_id) + + for f in os.scandir(release_dir): + # Skip non-music files. + if not any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): + continue + + tags = AudioFile.from_file(Path(f.path)) + # If this is the first track, upsert the release. + if release is None: + release = CachedRelease( + id=release_id, + source_path=release_dir, + title=tags.album or "Unknown Release", + release_type=( + tags.release_type + if tags.release_type in SUPPORTED_RELEASE_TYPES + else "unknown" + ), + release_year=tags.year, + new=True, + ) + conn.execute( + """ + INSERT INTO releases + (id, source_path, title, release_type, release_year, new) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT (id) DO UPDATE SET + source_path = ?, + title = ?, + release_type = ?, + release_year = ?, + new = ? + """, + ( + release.id, + str(release.source_path), + release.title, + release.release_type, + release.release_year, + release.new, + str(release.source_path), + release.title, + release.release_type, + release.release_year, + release.new, + ), + ) + for genre in tags.genre: + conn.execute( + """ + INSERT INTO releases_genres (release_id, genre) VALUES (?, ?) + ON CONFLICT (release_id, genre) DO NOTHING + """, + (release.id, genre), + ) + for role, names in asdict(tags.album_artists).items(): + for name in names: + conn.execute( + """ + INSERT INTO releases_artists (release_id, artist, role) + VALUES (?, ?, ?) + ON CONFLICT (release_id, artist) DO UPDATE SET role = ? + """, + (release.id, name, role, role), + ) + + # Now process the track. Release is guaranteed to exist here. + filepath = Path(f.path) + track_id = _parse_uuid_from_path(filepath) + if not track_id: + track_id = str(uuid6.uuid7()) + filepath = _rename_with_uuid(filepath, track_id) + track = CachedTrack( + id=track_id, + source_path=filepath, + source_mtime=int(f.stat().st_mtime), + title=tags.title or "Unknown Title", + release_id=release.id, + trackno=tags.track_number or "1", + discno=tags.disc_number or "1", + duration_sec=tags.duration_sec, + ) + conn.execute( + """ + INSERT INTO tracks + (id, source_path, source_mtime, title, release_id, track_number, disc_number, + duration_seconds) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (id) DO UPDATE SET + source_path = ?, + source_mtime = ?, + title = ?, + release_id = ?, + track_number = ?, + disc_number = ?, + duration_seconds = ? + """, + ( + track.id, + str(track.source_path), + track.source_mtime, + track.title, + track.release_id, + track.trackno, + track.discno, + track.duration_sec, + str(track.source_path), + track.source_mtime, + track.title, + track.release_id, + track.trackno, + track.discno, + track.duration_sec, + ), + ) + for role, names in asdict(tags.artists).items(): + for name in names: + conn.execute( + """ + INSERT INTO tracks_artists (track_id, artist, role) + VALUES (?, ?, ?) + ON CONFLICT (track_id, artist) DO UPDATE SET role = ? + """, + (track.id, name, role, role), + ) + + +def _parse_uuid_from_path(path: Path) -> str | None: + if m := re.search(r"\{id=([^\]]+)\}$", path.stem): + return m[1] + return None + + +def _rename_with_uuid(src: Path, uuid: str) -> Path: + new_stem = src.stem + f" {{id={uuid}}}" + dst = src.with_stem(new_stem) + src.rename(dst) + return dst diff --git a/rose/tagger/__init__.py b/rose/tagger/__init__.py index 6b5e6ad..c884143 100644 --- a/rose/tagger/__init__.py +++ b/rose/tagger/__init__.py @@ -48,6 +48,8 @@ class AudioFile: album_artists: ArtistTags artists: ArtistTags + duration_sec: int + @classmethod def from_file(cls, p: Path) -> AudioFile: return _convert_mutagen(mutagen.File(p), p) # type: ignore @@ -88,6 +90,7 @@ def _get_paired_frame(x: str) -> str | None: producer=_get_paired_frame("producer"), dj=_get_paired_frame("DJ-mix"), ), + duration_sec=round(m.info.length), ) if isinstance(m, mutagen.mp4.MP4): return AudioFile( @@ -108,6 +111,7 @@ def _get_paired_frame(x: str) -> str | None: conductor=_get_tag(m.tags, ["----:com.apple.iTunes:CONDUCTOR"]), dj=_get_tag(m.tags, ["----:com.apple.iTunes:DJMIXER"]), ), + duration_sec=round(m.info.length), # type: ignore ) if isinstance(m, (mutagen.flac.FLAC, mutagen.oggvorbis.OggVorbis, mutagen.oggopus.OggOpus)): return AudioFile( @@ -128,6 +132,7 @@ def _get_paired_frame(x: str) -> str | None: conductor=_get_tag(m.tags, ["conductor"]), dj=_get_tag(m.tags, ["djmixer"]), ), + duration_sec=round(m.info.length), # type: ignore ) raise UnsupportedFiletypeError(f"{p} is not a supported audio file.") diff --git a/rose/tagger/__test__.py b/rose/tagger/__test__.py index ca5f9f1..72d7b17 100644 --- a/rose/tagger/__test__.py +++ b/rose/tagger/__test__.py @@ -8,16 +8,16 @@ @pytest.mark.parametrize( - ("filepath", "track_num"), + ("filepath", "track_num", "duration"), [ - ("track1.flac", "1"), - ("track2.m4a", "2"), - ("track3.mp3", "3"), - ("track4.vorbis.ogg", "4"), - ("track5.opus.ogg", "5"), + ("track1.flac", "1", 2), + ("track2.m4a", "2", 2), + ("track3.mp3", "3", 1), + ("track4.vorbis.ogg", "4", 1), + ("track5.opus.ogg", "5", 1), ], ) -def test_getters(filepath: str, track_num: str) -> None: +def test_getters(filepath: str, track_num: str, duration: int) -> None: tf = AudioFile.from_file(FAKE_ALBUM_DIR / filepath) assert tf.track_number == track_num assert tf.title == f"Track {track_num}" @@ -38,6 +38,7 @@ def test_getters(filepath: str, track_num: str) -> None: composer=["Artist EF", "Artist FG"], djmixer=["Artist IJ", "Artist JK"], ) + assert tf.duration_sec == duration def test_split_tag() -> None: diff --git a/setup.py b/setup.py index 1252648..2c39d2e 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ "click", "fuse-python", "mutagen", + "uuid6-python", "yoyo-migrations", ], )