diff --git a/rose/cache.py b/rose/cache.py index 6d51b00..74046b5 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -224,7 +224,8 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: With these optimizations, we make a lot of readdir and stat calls, but minimize file and database accesses to solely the files that have updated since the last cache run. """ - logger.info(f"Refreshing cached data for {', '.join([r.name for r in release_dirs])}") + logger.info(f"Refreshing cached data for {len(release_dirs)}") + logger.debug(f"Refreshing cached data for {', '.join([r.name for r in release_dirs])}") # First, call readdir on every release directory. We store the results in a map of # Path Basename -> (Release ID if exists, File DirEntries). @@ -236,8 +237,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: for f in os.scandir(str(rd)): if m := STORED_DATA_FILE_REGEX.match(f.name): release_id = m[1] - elif any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): - files.append(f) + files.append(f) dir_tree.append((rd.resolve(), release_id, files)) if release_id is not None: release_uuids.append(release_id) @@ -291,7 +291,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: LEFT JOIN genres g ON g.release_id = r.id LEFT JOIN labels l ON l.release_id = r.id LEFT JOIN artists a ON a.release_id = r.id - WHERE r.id IN ({','.join(['?'*len(release_uuids)])}) + WHERE r.id IN ({','.join(['?']*len(release_uuids))}) """, release_uuids, ) @@ -349,7 +349,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: FROM tracks t JOIN releases r ON r.id = t.release_id LEFT JOIN artists a ON a.track_id = t.id - WHERE r.id IN ({','.join(['?'*len(release_uuids)])}) + WHERE r.id IN ({','.join(['?']*len(release_uuids))}) """, release_uuids, ) @@ -378,10 +378,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: # Now iterate over all releases in the source directory. Leverage mtime from stat to determine # whether to even check the file tags or not. Only perform database updates if necessary. for source_path, preexisting_release_id, files in dir_tree: - logger.debug( - f"Processing release {source_path} with {len(files)} " - f"files and preexisting id {preexisting_release_id}" - ) + logger.info(f"Updating release {source_path.name}") # Check to see if we should even process the directory. If the directory does not have any # tracks, skip it. And if it does not have any tracks, but is in the cache, remove it from # the cache. @@ -495,7 +492,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: # multidisc after having all the track metadata. So we do virtual_dirname calculation in a # follow-up loop. tracks: list[CachedTrack] = [] - track_ids_to_upsert: set[str] = set() + track_ids_to_insert: set[str] = set() # This value is set to true if we read an AudioFile and used it to confirm the release tags. # If this value is false after the following loop, we will use the cached values instead. pulled_release_tags = False @@ -624,7 +621,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: for role, names in asdict(tags.artists).items(): for name in names: track.artists.append(CachedArtist(name=name, role=role)) - track_ids_to_upsert.add(track.id) + track_ids_to_insert.add(track.id) # Now calculate whether this release is multidisc, and then assign virtual_filenames for # each track that lacks one. @@ -657,8 +654,11 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: virtual_filename = f"{original_virtual_filename} [{collision_no}]" seen_track_names.add(virtual_filename) if virtual_filename != t.virtual_filename: + logger.debug( + f"Track virtual filename change detected for {t.source_path}, updating" + ) tracks[i].virtual_filename = virtual_filename - track_ids_to_upsert.add(t.id) + track_ids_to_insert.add(t.id) # Database executions. logger.debug(f"Deleting {len(unknown_cached_tracks)} unknown tracks from cache") @@ -703,7 +703,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: ( release.id, str(release.source_path), - str(release.cover_image_path), + str(release.cover_image_path) if release.cover_image_path else None, release.datafile_mtime, release.virtual_dirname, release.title, @@ -713,7 +713,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: release.new, release.formatted_artists, str(release.source_path), - str(release.cover_image_path), + str(release.cover_image_path) if release.cover_image_path else None, release.datafile_mtime, release.virtual_dirname, release.title, @@ -750,13 +750,10 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: ) for track in tracks: - if track.id not in track_ids_to_upsert: + if track.id not in track_ids_to_insert: continue - # There should never be an upsert case, because when a track goes bad, we delete it - # from the database. We don't update it in place. This is because we lack stable IDs - # for tracks across refreshes. - logger.debug(f"Inserting dirty track in database: {track.source_path}") + logger.debug(f"Upserting dirty track in database: {track.source_path}") conn.execute( """ INSERT INTO tracks ( @@ -772,6 +769,16 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: , formatted_artists ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (id) DO UPDATE SET + source_path = ? + , source_mtime = ? + , virtual_filename = ? + , title = ? + , release_id = ? + , track_number = ? + , disc_number = ? + , duration_seconds = ? + , formatted_artists = ? """, ( track.id, @@ -784,6 +791,15 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: track.disc_number, track.duration_seconds, track.formatted_artists, + str(track.source_path), + track.source_mtime, + track.virtual_filename, + track.title, + track.release_id, + track.track_number, + track.disc_number, + track.duration_seconds, + track.formatted_artists, ), ) for art in track.artists: