From 7e7e4dd2738aa5a163210c5d147222a5ba78988d Mon Sep 17 00:00:00 2001 From: Jakub Kot <42355410+xnetcat@users.noreply.github.com> Date: Fri, 30 Jul 2021 14:50:28 +0200 Subject: [PATCH 1/4] bugfix: fixed m3u issues (#1357) * bugfix: fixed m3u issues - use santitized filenames in m3u - fixed playlist generation on first run * misc: removed flake8 warnings * misc: pep8 variables * misc: moved functions to provider utils * bugfix: lower song names * bugfix: improved m3u generation * misc: format with black * bugfix: fixed wrong filenames, added support for albums --- spotdl/download/downloader.py | 73 +----------------------- spotdl/parsers/query_parser.py | 2 +- spotdl/providers/provider_utils.py | 70 ++++++++++++++++++++++- spotdl/providers/yt_provider.py | 4 +- spotdl/providers/ytm_provider.py | 6 +- spotdl/search/song_gatherer.py | 92 ++++++++++++++++++++++++++---- spotdl/search/song_object.py | 14 ++--- 7 files changed, 165 insertions(+), 96 deletions(-) diff --git a/spotdl/download/downloader.py b/spotdl/download/downloader.py index 9a482d1ac..33faf1497 100644 --- a/spotdl/download/downloader.py +++ b/spotdl/download/downloader.py @@ -11,6 +11,7 @@ from spotdl.search import SongObject from spotdl.download.progress_ui_handler import YTDLLogger from spotdl.download import ffmpeg, set_id3_data, DisplayManager, DownloadTracker +from spotdl.providers.provider_utils import _get_converted_file_path class DownloadManager: @@ -282,75 +283,3 @@ def _perform_audio_download( temp_file.unlink() raise e - - -# ======================== -# === Helper function === -# ======================== - - -def _sanitize_filename(input_str: str) -> str: - output = input_str - - # ! this is windows specific (disallowed chars) - output = "".join(char for char in output if char not in "/?\\*|<>") - - # ! double quotes (") and semi-colons (:) are also disallowed characters but we would - # ! like to retain their equivalents, so they aren't removed in the prior loop - output = output.replace('"', "'").replace(":", "-") - - return output - - -def _get_smaller_file_path(input_song: SongObject, output_format: str) -> Path: - # Only use the first artist if the song path turns out to be too long - smaller_name = f"{input_song.contributing_artists[0]} - {input_song.song_name}" - - smaller_name = _sanitize_filename(smaller_name) - - try: - return Path(f"{smaller_name}.{output_format}").resolve() - except (OSError, WindowsError): - # Expected to happen in the rare case when the saved path is too long, - # even with the short filename - raise OSError("Cannot save song due to path issues.") - - -def _get_converted_file_path(song_obj: SongObject, output_format: str = None) -> Path: - - # ! we eliminate contributing artist names that are also in the song name, else we - # ! would end up with things like 'Jetta, Mastubs - I'd love to change the world - # ! (Mastubs REMIX).mp3' which is kinda an odd file name. - - # also make sure that main artist is included in artistStr even if they - # are in the song name, for example - # Lil Baby - Never Recover (Lil Baby & Gunna, Drake).mp3 - - artists_filtered = [] - - if output_format is None: - output_format = "mp3" - - for artist in song_obj.contributing_artists: - if artist.lower() not in song_obj.song_name: - artists_filtered.append(artist) - elif artist.lower() is song_obj.contributing_artists[0].lower(): - artists_filtered.append(artist) - - artist_str = ", ".join(artists_filtered) - - converted_file_name = _sanitize_filename( - f"{artist_str} - {song_obj.song_name}.{output_format}" - ) - - converted_file_path = Path(converted_file_name) - - # ! Checks if a file name is too long (256 max on both linux and windows) - try: - if len(str(converted_file_path.resolve().name)) > 256: - print("Path was too long. Using Small Path.") - return _get_smaller_file_path(song_obj, output_format) - except (OSError, WindowsError): - return _get_smaller_file_path(song_obj, output_format) - - return converted_file_path diff --git a/spotdl/parsers/query_parser.py b/spotdl/parsers/query_parser.py index a04768233..be2c7e255 100644 --- a/spotdl/parsers/query_parser.py +++ b/spotdl/parsers/query_parser.py @@ -64,7 +64,7 @@ def parse_request( elif "open.spotify.com" in request and "album" in request: print("Fetching Album...") song_list = song_gatherer.from_album( - request, output_format, use_youtube, threads + request, output_format, use_youtube, generate_m3u, threads ) elif "open.spotify.com" in request and "playlist" in request: print("Fetching Playlist...") diff --git a/spotdl/providers/provider_utils.py b/spotdl/providers/provider_utils.py index 421c8dffb..f4c7d6ce5 100644 --- a/spotdl/providers/provider_utils.py +++ b/spotdl/providers/provider_utils.py @@ -3,6 +3,7 @@ from typing import List from rapidfuzz import fuzz from bs4 import BeautifulSoup +from pathlib import Path def _match_percentage(str1: str, str2: str, score_cutoff: float = 0) -> float: @@ -60,7 +61,7 @@ def _parse_duration(duration: str) -> float: def _create_song_title(song_name: str, song_artists: List[str]) -> str: joined_artists = ", ".join(song_artists) - return f"{joined_artists} - {song_name}".lower() + return f"{joined_artists} - {song_name}" def _get_song_lyrics(song_name: str, song_artists: List[str]) -> str: @@ -102,3 +103,70 @@ def _get_song_lyrics(song_name: str, song_artists: List[str]) -> str: return "" except: # noqa: E722 return "" + + +def _sanitize_filename(input_str: str) -> str: + output = input_str + + # ! this is windows specific (disallowed chars) + output = "".join(char for char in output if char not in "/?\\*|<>") + + # ! double quotes (") and semi-colons (:) are also disallowed characters but we would + # ! like to retain their equivalents, so they aren't removed in the prior loop + output = output.replace('"', "'").replace(":", "-") + + return output + + +def _get_smaller_file_path(input_song, output_format: str) -> Path: + # Only use the first artist if the song path turns out to be too long + smaller_name = f"{input_song.contributing_artists[0]} - {input_song.song_name}" + + smaller_name = _sanitize_filename(smaller_name) + + try: + return Path(f"{smaller_name}.{output_format}").resolve() + except (OSError, WindowsError): + # Expected to happen in the rare case when the saved path is too long, + # even with the short filename + raise OSError("Cannot save song due to path issues.") + + +def _get_converted_file_path(song_obj, output_format: str = None) -> Path: + + # ! we eliminate contributing artist names that are also in the song name, else we + # ! would end up with things like 'Jetta, Mastubs - I'd love to change the world + # ! (Mastubs REMIX).mp3' which is kinda an odd file name. + + # also make sure that main artist is included in artistStr even if they + # are in the song name, for example + # Lil Baby - Never Recover (Lil Baby & Gunna, Drake).mp3 + + artists_filtered = [] + + if output_format is None: + output_format = "mp3" + + for artist in song_obj.contributing_artists: + if artist.lower() not in song_obj.song_name: + artists_filtered.append(artist) + elif artist.lower() is song_obj.contributing_artists[0].lower(): + artists_filtered.append(artist) + + artist_str = ", ".join(artists_filtered) + + converted_file_name = _sanitize_filename( + f"{artist_str} - {song_obj.song_name}.{output_format}" + ) + + converted_file_path = Path(converted_file_name) + + # ! Checks if a file name is too long (256 max on both linux and windows) + try: + if len(str(converted_file_path.resolve().name)) > 256: + print("Path was too long. Using Small Path.") + return _get_smaller_file_path(song_obj, output_format) + except (OSError, WindowsError): + return _get_smaller_file_path(song_obj, output_format) + + return converted_file_path diff --git a/spotdl/providers/yt_provider.py b/spotdl/providers/yt_provider.py index a58a16abb..adb96bc6d 100644 --- a/spotdl/providers/yt_provider.py +++ b/spotdl/providers/yt_provider.py @@ -45,7 +45,7 @@ def search_and_get_best_match( if isrc_result is not None and isrc_result.watch_url is not None: return isrc_result.watch_url - song_title = _create_song_title(song_name, song_artists) + song_title = _create_song_title(song_name, song_artists).lower() # Query YTM by songs only first, this way if we get correct result on the first try # we don't have to make another request to ytmusic api that could result in us @@ -126,7 +126,7 @@ def _order_yt_results( continue artist_match = (artist_match_number / len(song_artists)) * 100 - song_title = _create_song_title(song_name, song_artists) + song_title = _create_song_title(song_name, song_artists).lower() name_match = round( _match_percentage( unidecode(result.title.lower()), unidecode(song_title), 60 diff --git a/spotdl/providers/ytm_provider.py b/spotdl/providers/ytm_provider.py index e3b942755..8b56b36b4 100644 --- a/spotdl/providers/ytm_provider.py +++ b/spotdl/providers/ytm_provider.py @@ -61,7 +61,7 @@ def search_and_get_best_match( ): return isrc_result["link"] - song_title = _create_song_title(song_name, song_artists) + song_title = _create_song_title(song_name, song_artists).lower() # Query YTM by songs only first, this way if we get correct result on the first try # we don't have to make another request to ytmusic api that could result in us @@ -85,7 +85,7 @@ def search_and_get_best_match( # We didn't find the correct song on the first try so now we get video type results # add them to song_results, and get the result with highest score video_results = _query_and_simplify( - _create_song_title(song_name, song_artists), filter="videos" + _create_song_title(song_name, song_artists).lower(), filter="videos" ) # Order video results @@ -194,7 +194,7 @@ def _order_ytm_results( artist_match = (artist_match_number / len(song_artists)) * 100 - song_title = _create_song_title(song_name, song_artists) + song_title = _create_song_title(song_name, song_artists).lower() # Find name match and drop results below 60% # this needs more testing diff --git a/spotdl/search/song_gatherer.py b/spotdl/search/song_gatherer.py index 4a7fb0e91..b8f19aaa2 100644 --- a/spotdl/search/song_gatherer.py +++ b/spotdl/search/song_gatherer.py @@ -10,6 +10,7 @@ provider_utils, ) from spotdl.search import SongObject, SpotifyClient +from spotdl.providers.provider_utils import _get_converted_file_path def from_spotify_url( @@ -124,6 +125,7 @@ def from_album( album_url: str, output_format: str = None, use_youtube: bool = False, + generate_m3u: bool = False, threads: int = 1, ) -> List[SongObject]: """ @@ -163,20 +165,77 @@ def from_album( def get_tracks(track): try: - return from_spotify_url( + song = from_spotify_url( "https://open.spotify.com/track/" + track["id"], output_format, use_youtube, ) - except (LookupError, OSError, ValueError): - return None + + if generate_m3u: + file_path = _get_converted_file_path(song, output_format) + + return song, f"{file_path}\n" + + return song, None + except (LookupError, ValueError): + return None, None + except OSError: + if generate_m3u: + file_path = ( + str( + provider_utils._create_song_title( + track["name"], + [artist["name"] for artist in track["artists"]], + ) + ) + + "." + + output_format + if output_format is not None + else "mp3" + ) + + if len(file_path) > 256: + file_path = ( + str( + provider_utils._create_song_title( + track["name"], [track["artists"][0]["name"]] + ) + ) + + "." + + output_format + if output_format is not None + else "mp3" + ) + + return None, f"{file_path}\n" + + return None, None with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: results = executor.map(get_tracks, album_tracks) - for song in results: - if song is not None and song.youtube_link is not None: - tracks.append(song) + album_text = "" + for result in results: + if result[1] is not None: + album_text += "".join(char for char in result[1] if char not in "/?\\*|<>") + + if result[0] is not None and result[0].youtube_link is not None: + tracks.append(result[0]) + + if album_response and generate_m3u is True: + album_data = spotify_client.album(album_url) + + if album_data is not None: + album_name = album_data["name"] + else: + album_name = album_tracks[0]["name"] + + album_name = "".join(char for char in album_name if char not in "/?\\*|<>") + + album_file = Path(f"{album_name}.m3u") + + with open(album_file, "w", encoding="utf-8") as file: + file.write(album_text) return tracks @@ -239,6 +298,13 @@ def get_song(track): output_format, use_youtube, ) + + if generate_m3u: + file_path = _get_converted_file_path(song, output_format) + + return song, f"{file_path}\n" + + return song, None except (LookupError, ValueError): return None, None except OSError: @@ -255,11 +321,13 @@ def get_song(track): if output_format is not None else "mp3" ) + if len(file_path) > 256: file_path = ( str( provider_utils._create_song_title( - track.song_name, [track.contributing_artists[0]] + track["track"]["name"], + [track["track"]["artists"][0]["name"]], ) ) + "." @@ -271,8 +339,6 @@ def get_song(track): return None, f"{file_path}\n" return None, None - else: - return song, None with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: results = executor.map(get_song, playlist_tracks) @@ -280,7 +346,9 @@ def get_song(track): playlist_text = "" for result in results: if result[1] is not None: - playlist_text += result[1] + playlist_text += "".join( + char for char in result[1] if char not in "/?\\*|<>" + ) if result[0] is not None and result[0].youtube_link is not None: tracks.append(result[0]) @@ -293,6 +361,10 @@ def get_song(track): else: playlist_name = playlist_tracks[0]["track"]["name"] + playlist_name = "".join( + char for char in playlist_name if char not in "/?\\*|<>" + ) + playlist_file = Path(f"{playlist_name}.m3u") with open(playlist_file, "w", encoding="utf-8") as file: diff --git a/spotdl/search/song_object.py b/spotdl/search/song_object.py index 4d2f674f0..161cb507b 100644 --- a/spotdl/search/song_object.py +++ b/spotdl/search/song_object.py @@ -183,24 +183,24 @@ def file_name(self) -> str: def create_file_name(song_name: str, song_artists: List[str]) -> str: # build file name of converted file # the main artist is always included - artistStr = song_artists[0] + artist_string = song_artists[0] # ! we eliminate contributing artist names that are also in the song name, else we # ! would end up with things like 'Jetta, Mastubs - I'd love to change the world # ! (Mastubs REMIX).mp3' which is kinda an odd file name. for artist in song_artists[1:]: if artist.lower() not in song_name.lower(): - artistStr += ", " + artist + artist_string += ", " + artist - convertedFileName = artistStr + " - " + song_name + converted_file_name = artist_string + " - " + song_name # ! this is windows specific (disallowed chars) - convertedFileName = "".join( - char for char in convertedFileName if char not in "/?\\*|<>" + converted_file_name = "".join( + char for char in converted_file_name if char not in "/?\\*|<>" ) # ! double quotes (") and semi-colons (:) are also disallowed characters but we would # ! like to retain their equivalents, so they aren't removed in the prior loop - convertedFileName = convertedFileName.replace('"', "'").replace(":", "-") + converted_file_name = converted_file_name.replace('"', "'").replace(":", "-") - return convertedFileName + return converted_file_name From b3c6df7c6291258ce315574edbc8f70e69c8c1a4 Mon Sep 17 00:00:00 2001 From: Jakub Kot <42355410+xnetcat@users.noreply.github.com> Date: Fri, 30 Jul 2021 14:51:20 +0200 Subject: [PATCH 2/4] bugfix: remove duplicate songs from songs_list (#1356) --- spotdl/parsers/query_parser.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/spotdl/parsers/query_parser.py b/spotdl/parsers/query_parser.py index be2c7e255..f91fe24b4 100644 --- a/spotdl/parsers/query_parser.py +++ b/spotdl/parsers/query_parser.py @@ -26,7 +26,15 @@ def parse_query( # linefeed to visually separate output for each query print() - return songs_list + # remove duplicates + seen_songs = set() + songs = [] + for song in songs_list: + if song.file_name not in seen_songs: + songs.append(song) + seen_songs.add(song.file_name) + + return songs def parse_request( From 2cb3738d5bd9f6c33c3113a730c94c2b7a3bc198 Mon Sep 17 00:00:00 2001 From: Jakub Kot <42355410+xnetcat@users.noreply.github.com> Date: Fri, 30 Jul 2021 14:53:20 +0200 Subject: [PATCH 3/4] bugfix: fixed ytdl error reporting (#1360) * bugfix: fixed error ytdl error reporting * bugfix: fixed ffmpeg error message --- spotdl/download/downloader.py | 2 -- spotdl/download/ffmpeg.py | 2 +- spotdl/download/progress_ui_handler.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/spotdl/download/downloader.py b/spotdl/download/downloader.py index 33faf1497..bc8d0ae5d 100644 --- a/spotdl/download/downloader.py +++ b/spotdl/download/downloader.py @@ -177,7 +177,6 @@ async def download_song(self, song_object: SongObject) -> None: "outtmpl": f"{str(temp_folder)}/%(id)s.%(ext)s", "quiet": True, "no_warnings": True, - "ignoreerrors": True, "logger": YTDLLogger(), "progress_hooks": [display_progress_tracker.ytdl_progress_hook] if display_progress_tracker @@ -277,7 +276,6 @@ def _perform_audio_download( except Exception as e: # noqa:E722 # ! This is equivalent to a failed download, we do nothing, the song remains on # ! download_trackers download queue and all is well... - temp_files = Path(temp_folder).glob(f"{converted_file_name}.*") for temp_file in temp_files: temp_file.unlink() diff --git a/spotdl/download/ffmpeg.py b/spotdl/download/ffmpeg.py index bccae9088..365bb6957 100644 --- a/spotdl/download/ffmpeg.py +++ b/spotdl/download/ffmpeg.py @@ -102,7 +102,7 @@ async def convert( proc_out = await process.communicate() - if proc_out[0] and proc_out[1]: + if proc_out[0] or proc_out[1]: out = str(b"".join(proc_out)) else: out = "" diff --git a/spotdl/download/progress_ui_handler.py b/spotdl/download/progress_ui_handler.py index 6be502781..f0f4cf98c 100644 --- a/spotdl/download/progress_ui_handler.py +++ b/spotdl/download/progress_ui_handler.py @@ -42,7 +42,7 @@ def warning(self, msg): pass def error(self, msg): - pass + raise Exception(msg) class SizedTextColumn(ProgressColumn): From e97c94a29a6009536d8920d78fe19bf4720b9f8b Mon Sep 17 00:00:00 2001 From: Silverarmor <23619946+Silverarmor@users.noreply.github.com> Date: Sat, 31 Jul 2021 00:55:05 +1200 Subject: [PATCH 4/4] Remembered to bump version number to 3.7.2 Are you proud of me? I actually did it before merging :) --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 86348fba4..4e1c219bd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 3.7.1 +version = 3.7.2 name = spotdl url = https://github.com/spotDL/spotify-downloader