Merge pull request #1204 from spotDL/dev

* added disc number metadata (#1195) Authored by @xnetcat * fixed ffmpeg installation for tests (#1200) Authored by @xnetcat * skip results without videoId (#1202) Authored by @xnetcat * Add Lyrics Support (#1201) Authored by @s1as3r * Bump Version Number to 3.4.0 @Silverarmor * @aklajnert's Allow specifying output directory (#1207) Authored by @aklajnert * get all artist tracks (#1208) Authored by @xnetcat Co-authored-by: Jakub <42355410+xnetcat@users.noreply.github.com> Co-authored-by: Arbaaz Shafiq <arbaazshafiq@gmail.com> Co-authored-by: Silverarmor <23619946+Silverarmor@users.noreply.github.com>
spotDL · Mar 16, 2021 · dd57613 · dd57613
2 parents caabc72 + 38d6f7d
commit dd57613
Show file tree

Hide file tree

Showing 16 changed files with 63,501 additions and 22,769 deletions.
diff --git a/.github/workflows/spotify-downloader-ci.yml b/.github/workflows/spotify-downloader-ci.yml
@@ -73,7 +73,7 @@ jobs:
           python-version: 3.8
       - name: Install dependencies
         run: |
-          sudo add-apt-repository ppa:jonathonf/ffmpeg-4 -y
+          sudo add-apt-repository ppa:savoury1/ffmpeg4 -y
           sudo apt-get update
           sudo apt install ffmpeg -y
           python -m pip install -e .[test]

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 3.3.3
+version = 3.4.0
 
 name = spotdl
 url = https://github.com/spotDL/spotify-downloader
@@ -35,6 +35,8 @@ install_requires =
     mutagen
     ytmusicapi
     tqdm
+    bs4
+    requests
 python_requires = >=3.6
 packages = find:
 
@@ -59,3 +61,4 @@ ignore_missing_imports = True
 
 [flake8]
 max-line-length = 100
+ignore = E301
diff --git a/spotdl/__init__.py b/spotdl/__init__.py
@@ -1,4 +1,4 @@
-from .__main__ import console_entry_point	
+from .__main__ import console_entry_point
 
 __all__ = [
     'console_entry_point',

diff --git a/spotdl/__main__.py b/spotdl/__main__.py
@@ -1,12 +1,19 @@
 #! Basic necessities to get the CLI running
 import argparse
+import os
+import sys
 
 # ! The actual download stuff
 from spotdl.download.downloader import DownloadManager
 from spotdl.search import spotifyClient
 from spotdl.search.songObj import SongObj
 # ! Song Search from different start points
-from spotdl.search.utils import get_playlist_tracks, get_album_tracks, search_for_song
+from spotdl.search.utils import (
+    get_playlist_tracks,
+    get_album_tracks,
+    get_artist_tracks,
+    search_for_song,
+)
 
 # ! Usage is simple - call:
 #   'python __main__.py <links, search terms, tracking files separated by spaces>
@@ -88,6 +95,12 @@ def console_entry_point():
         clientSecret='0f02b7c483c04257984695007a4a8d5c'
     )
 
+    if arguments.path:
+        if not os.path.isdir(arguments.path):
+            sys.exit("The output directory doesn't exist.")
+        print(f"Will download to: {os.path.abspath(arguments.path)}")
+        os.chdir(arguments.path)
+
     downloader = DownloadManager()
 
     for request in arguments.url:
@@ -114,6 +127,12 @@ def console_entry_point():
 
             downloader.download_multiple_songs(songObjList)
 
+        elif 'open.spotify.com' in request and 'artist' in request:
+            print('Fetching artist...')
+            artistObjList = get_artist_tracks(request)
+
+            downloader.download_multiple_songs(artistObjList)
+
         elif request.endswith('.spotdlTrackingFile'):
             print('Preparing to resume download...')
             downloader.resume_download_from_tracking_file(request)
@@ -136,7 +155,8 @@ def parse_arguments():
         description=help_notice,
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    parser.add_argument("url", type=str, nargs="+")
+    parser.add_argument("url", type=str, nargs="+", help="URL to a song/album/playlist")
+    parser.add_argument("-o", "--output", help="Output directory path", dest="path")
 
     return parser.parse_args()
 

diff --git a/spotdl/download/downloader.py b/spotdl/download/downloader.py
@@ -10,7 +10,7 @@
 from urllib.request import urlopen
 
 from mutagen.easyid3 import EasyID3, ID3
-from mutagen.id3 import APIC as AlbumCover
+from mutagen.id3 import APIC as AlbumCover, USLT
 from pytube import YouTube
 
 from spotdl.download.progressHandlers import DisplayManager, DownloadTracker
@@ -270,6 +270,8 @@ def set_id3_data(self, convertedFilePath, songObj):
         audioFile['titlesort'] = songObj.get_song_name()
         # ! track number
         audioFile['tracknumber'] = str(songObj.get_track_number())
+        # ! disc number
+        audioFile['discnumber'] = str(songObj.get_disc_number())
         # ! genres (pretty pointless if you ask me)
         # ! we only apply the first available genre as ID3 v2.3 doesn't support multiple
         # ! genres and ~80% of the world PC's run Windows - an OS with no ID3 v2.4 support
@@ -298,6 +300,11 @@ def set_id3_data(self, convertedFilePath, songObj):
             desc='Cover',
             data=rawAlbumArt
         )
+        # ! setting the lyrics
+        lyrics = songObj.get_lyrics()
+        USLTOutput = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
+        audioFile["USLT::'eng'"] = USLTOutput
+
         audioFile.save(v2_version=3)
 
     def close(self) -> None:

diff --git a/spotdl/search/provider.py b/spotdl/search/provider.py
@@ -9,6 +9,8 @@
 
 from rapidfuzz.fuzz import partial_ratio
 from ytmusicapi import YTMusic
+from bs4 import BeautifulSoup
+from requests import get
 
 
 # ================================
@@ -88,18 +90,22 @@ def _parse_duration(duration: str) -> float:
 
 
 def _map_result_to_song_data(result: dict) -> dict:
-    artists = ", ".join(map(lambda a: a['name'], result['artists']))
-    video_id = result['videoId']
-    song_data = {
-        'name': result['title'],
-        'type': result['resultType'],
-        'artist': artists,
-        'length': _parse_duration(result.get('duration', None)),
-        'link': f'https://www.youtube.com/watch?v={video_id}',
-        'position': 0
-    }
-    if 'album' in result:
-        song_data['album'] = result['album']['name']
+    song_data = {}
+    if result['resultType'] in ['song', 'video']:
+        artists = ", ".join(map(lambda a: a['name'], result['artists']))
+        video_id = result['videoId']
+        if video_id is None:
+            return {}
+        song_data = {
+            'name': result['title'],
+            'type': result['resultType'],
+            'artist': artists,
+            'length': _parse_duration(result.get('duration', None)),
+            'link': f'https://www.youtube.com/watch?v={video_id}',
+            'position': 0
+        }
+        if 'album' in result:
+            song_data['album'] = result['album']['name']
 
     return song_data
 
@@ -119,7 +125,7 @@ def _query_and_simplify(searchTerm: str) -> List[dict]:
     # build and POST a query to YTM
 
     print(f'Searching for: {searchTerm}')
-    searchResult = ytmApiClient.search(searchTerm, filter='videos')
+    searchResult = ytmApiClient.search(searchTerm)
 
     return list(map(_map_result_to_song_data, searchResult))
 
@@ -152,6 +158,11 @@ def search_and_order_ytm_results(songName: str, songArtists: List[str],
     linksWithMatchValue = {}
 
     for result in results:
+        # ! skip results without videoId, this happens if you are country restricted or
+        # ! video is unavailabe
+        if result == {}:
+            continue
+
         # ! If there are no common words b/w the spotify and YouTube Music name, the song
         # ! is a wrong match (Like Ruelle - Madness being matched to Ruelle - Monster, it
         # ! happens without this conditional)
@@ -266,3 +277,43 @@ def search_and_get_best_match(songName: str, songArtists: List[str],
     # ! In theory, the first 'TUPLE' in sortedResults should have the highest match
     # ! value, we send back only the link
     return sortedResults[0][0]
+
+
+def get_song_lyrics(song_name: str, song_artists: List[str]) -> str:
+    """
+    `str` `song_name` : name of song
+
+    `list<str>` `song_artists` : list containing name of contributing artists
+
+    RETURNS `str`: Lyrics of the song.
+
+    Gets the metadata of the song.
+    """
+
+    headers = {
+        'Authorization': 'Bearer alXXDbPZtK1m2RrZ8I4k2Hn8Ahsd0Gh_o076HYvcdlBvmc0ULL1H8Z8xRlew5qaG',
+    }
+    api_search_url = 'https://api.genius.com/search'
+    search_query = f'{song_name} {", ".join(song_artists)}'
+
+    api_response = get(
+        api_search_url,
+        params={'q': search_query},
+        headers=headers
+    ).json()
+
+    song_id = api_response['response']['hits'][0]['result']['id']
+    song_api_url = f'https://api.genius.com/songs/{song_id}'
+
+    api_response = get(
+        song_api_url,
+        headers=headers
+    ).json()
+
+    song_url = api_response['response']['song']['url']
+
+    genius_page = get(song_url)
+    soup = BeautifulSoup(genius_page.text, 'html.parser')
+    lyrics = soup.select_one('div.lyrics').get_text()
+
+    return lyrics.strip()
diff --git a/spotdl/search/songObj.py b/spotdl/search/songObj.py
@@ -1,6 +1,6 @@
 from typing import List
 
-from spotdl.search.provider import search_and_get_best_match
+from spotdl.search.provider import search_and_get_best_match, get_song_lyrics
 from spotdl.search.spotifyClient import get_spotify_client
 
 
@@ -12,11 +12,12 @@ class SongObj():
     # ====================
     # === Constructors ===
     # ====================
-    def __init__(self, rawTrackMeta, rawAlbumMeta, rawArtistMeta, youtubeLink):
+    def __init__(self, rawTrackMeta, rawAlbumMeta, rawArtistMeta, youtubeLink, lyrics):
         self.__rawTrackMeta = rawTrackMeta
         self.__rawAlbumMeta = rawArtistMeta
         self.__rawArtistMeta = rawArtistMeta
         self.__youtubeLink = youtubeLink
+        self.__lyrics = lyrics
 
     # ! constructors here are a bit mucky, there are two different constructors for two
     # ! different use cases, hence the actual __init__ function does not exist
@@ -63,9 +64,16 @@ def from_url(cls, spotifyURL: str):
             duration
         )
 
+        # try to get lyrics from Genius
+        try:
+            lyrics = get_song_lyrics(songName, contributingArtists)
+        except (AttributeError, IndexError):
+            lyrics = ""
+
         return cls(
             rawTrackMeta, rawAlbumMeta,
-            rawArtistMeta, youtubeLink
+            rawArtistMeta, youtubeLink,
+            lyrics
         )
 
     @classmethod
@@ -74,10 +82,12 @@ def from_dump(cls, dataDump: dict):
         rawAlbumMeta = dataDump['rawAlbumMeta']
         rawArtistMeta = dataDump['rawAlbumMeta']
         youtubeLink = dataDump['youtubeLink']
+        lyrics = dataDump['lyrics']
 
         return cls(
             rawTrackMeta, rawAlbumMeta,
-            rawArtistMeta, youtubeLink
+            rawArtistMeta, youtubeLink,
+            lyrics
         )
 
     def __eq__(self, comparedSong) -> bool:
@@ -149,6 +159,17 @@ def get_contributing_artists(self) -> List[str]:
             contributingArtists.append(artist['name'])
 
         return contributingArtists
+    # ! 6. Disc Number
+    def get_disc_number(self) -> int:
+        return self.__rawTrackMeta['disc_number']
+
+    # ! 6. Lyrics
+    def get_lyrics(self):
+        '''
+        returns the lyrics of the song if found on Genius
+        '''
+
+        return self.__lyrics
 
     # ! Album Details:
 
@@ -213,5 +234,6 @@ def get_data_dump(self) -> dict:
             'youtubeLink': self.__youtubeLink,
             'rawTrackMeta': self.__rawTrackMeta,
             'rawAlbumMeta': self.__rawAlbumMeta,
-            'rawArtistMeta': self.__rawArtistMeta
+            'rawArtistMeta': self.__rawArtistMeta,
+            'lyrics': self.__lyrics
         }
diff --git a/spotdl/search/utils.py b/spotdl/search/utils.py
@@ -65,6 +65,71 @@ def get_album_tracks(albumUrl: str) -> List[SongObj]:
     return albumTracks
 
 
+def get_artist_tracks(artistUrl: str) -> List[SongObj]:
+    '''
+    `str` `albumUrl` : Spotify Url of the artist whose tracks are to be
+    retrieved
+
+    returns a `list<songObj>` containing Url's of each track in the artist profile
+    '''
+
+    spotifyClient = get_spotify_client()
+    artistTracks = []
+    offset = 0
+
+    artistResponse = spotifyClient.artist_albums(artistUrl)
+
+    # while loop acts like do-while
+    while True:
+        for album in artistResponse['items']:
+            # get albums and singles
+            if not (
+                album['album_group'] == 'appears_on' and album['album_type'] in [
+                    'album', 'compilation']
+            ):
+                artistTracks.extend(get_album_tracks(album['id']))
+            # get features from other artists albums
+            elif album['album_group'] == 'appears_on' and album['album_type'] == 'album':
+                trackResponse = spotifyClient.album_tracks(album['uri'])
+                albumTracks = []
+
+                # while loop acts like do-while
+                while True:
+                    for track in trackResponse['items']:
+                        for artist in track['artists']:
+                            if artist['id'] == artistResponse['href'].split('/')[-2]:
+                                song = SongObj.from_url(
+                                    'https://open.spotify.com/track/' + track['id']
+                                )
+
+                                if song.get_youtube_link() is not None:
+                                    albumTracks.append(song)
+
+                    # check if more tracks are to be passed
+                    if trackResponse['next']:
+                        trackResponse = spotifyClient.album_tracks(
+                            album['uri'],
+                            offset=len(albumTracks)
+                        )
+                    else:
+                        break
+
+                artistTracks.extend(albumTracks)
+
+        offset += len(artistResponse['items'])
+
+        # check if more albums are to be passed
+        if artistResponse['next']:
+            artistResponse = spotifyClient.artist_albums(
+                artistUrl,
+                offset=offset
+            )
+        else:
+            break
+
+    return artistTracks
+
+
 def get_playlist_tracks(playlistUrl: str) -> List[SongObj]:
     '''
     `str` `playlistUrl` : Spotify Url of the album whose tracks are to be