From 44783b295b5911a8c1e29a0890acad49de6a2348 Mon Sep 17 00:00:00 2001
From: edward <ed@zervice.us>
Date: Mon, 10 Aug 2020 19:20:35 +0800
Subject: [PATCH 1/2] =?UTF-8?q?1.=20=E6=94=AF=E6=8C=81b=E7=AB=99=E5=AD=97?=
 =?UTF-8?q?=E5=B9=95=E4=B8=8B=E8=BD=BD=20=202.=20=E4=B8=8D=E4=B8=8B?=
 =?UTF-8?q?=E5=BC=B9=E5=B9=95=20=203.=20b=E7=AB=99=E5=8F=AF=E8=83=BD?=
 =?UTF-8?q?=E6=8F=8F=E8=BF=B0=E5=BE=88=E9=95=BF=E6=97=B6=E6=97=A0=E6=B3=95?=
 =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/you_get/extractor.py           |  6 ----
 src/you_get/extractors/bilibili.py | 57 ++++++++++++++++++++++++++++++
 src/you_get/util/fs.py             |  2 +-
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py
index c4315935e7..83742f0fab 100644
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@@ -253,12 +253,6 @@ def download(self, **kwargs):
                     x.write(srt)
                 print('Done.')
 
-            if self.danmaku is not None and not dry_run:
-                filename = '{}.cmt.xml'.format(get_filename(self.title))
-                print('Downloading {} ...\n'.format(filename))
-                with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
-                    fp.write(self.danmaku)
-
             if self.lyrics is not None and not dry_run:
                 filename = '{}.lrc'.format(get_filename(self.title))
                 print('Downloading {} ...\n'.format(filename))
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 94e5479f65..620b0ff535 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import math
 from ..common import *
 from ..extractor import VideoExtractor
 
@@ -131,6 +132,8 @@ def url_size(url, faker=False, headers={},err_value=0):
         except:
             return err_value
 
+    # https://api.bilibili.com/x/player.so?id=cid%3A162260003&aid=95051759&bvid=BV1zE411T7nb&buvid=FB2BB46F-B1F3-4BDA-A589-33348940411A155830infoc
+
     def prepare(self, **kwargs):
         self.stream_qualities = {s['quality']: s for s in self.stream_types}
 
@@ -569,6 +572,21 @@ def extract(self, **kwargs):
             # extract stream with the best quality
             stream_id = self.streams_sorted[0]['id']
 
+    def formattime(t):
+        if t/10 == 0:
+            return '0'+str(t)
+        else:
+            return str(t)
+
+    def ms2time(t):
+        m = t/60000
+        t = t%60000
+        s = t/1000
+        t = t%1000
+        minsec = formattime(m)+':'+formattime(s)+'.'+str(t)
+        return minsec
+
+
     def download_playlist_by_url(self, url, **kwargs):
         self.url = url
         kwargs['playlist'] = True
@@ -664,12 +682,51 @@ def download_playlist_by_url(self, url, **kwargs):
                 p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or '1')-1
                 for pi in range(p,pn):
                     self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
+                    tttt = self.title
                     try:
                         self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
                     except:
                         self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
                     self.extract(**kwargs)
                     self.download(**kwargs)
+                    lrcurl = "https://api.bilibili.com/x/player.so?id=cid%3A" + str(initial_state['videoData']['pages'][pi]['cid']) + "&aid=" + str(aid) + "&bvid=" +initial_state['videoData']["bvid"]+"&buvid=FB2BB46F-B1F3-4BDA-A589-33348940411A155830infoc"
+                    print("lrc url", lrcurl)
+                    # -H 'Referer: https://www.bilibili.com/video/BV1zE411T7nb'
+                    h = dict()
+                    jsonOfLrc = get_content(lrcurl, headers={"Referer": "https://www.bilibili.com/video/" + initial_state['videoData']["bvid"]})
+                    # Example line:
+                    # <subtitle>{"allow_submit":false,"lan":"","lan_doc":"","subtitles":[{"id":23916631605379079,"lan":"zh-CN","lan_doc":"中文（中国）","is_lock":false,"subtitle_url":"//i0.hdslb.com/bfs/subtitle/dfb81041cf92b5c2ebce2540cd14c9e49674f460.json"}]}</subtitle>
+                    subtitleMeta = match1(jsonOfLrc, r'<subtitle>(.*?)</subtitle>')
+                    subtitlejson = json.loads(subtitleMeta)
+                    print(subtitlejson)
+                    if len(subtitlejson["subtitles"])> 0:
+                        suburl = subtitlejson["subtitles"][0]["subtitle_url"]
+                        subjson = get_content("https:" + suburl)
+                        file = ''
+                        datas = json.loads(subjson)
+                        i = 1
+                        for data in datas['body']:
+                            start = data['from']  # 获取开始时间
+                            stop = data['to']  # 获取结束时间
+                            content = data['content']  # 获取字幕内容
+                            file += '{}\n'.format(i)  # 加入序号
+                            hour = math.floor(start) // 3600
+                            minute = (math.floor(start) - hour * 3600) // 60
+                            sec = math.floor(start) - hour * 3600 - minute * 60
+                            minisec = int(math.modf(start)[0] * 100)  # 处理开始时间
+                            file += str(hour).zfill(2) + ':' + str(minute).zfill(2) + ':' + str(sec).zfill(2) + ',' + str(minisec).zfill(2)  # 将数字填充0并按照格式写入
+                            file += ' --> '
+                            hour = math.floor(stop) // 3600
+                            minute = (math.floor(stop) - hour * 3600) // 60
+                            sec = math.floor(stop) - hour * 3600 - minute * 60
+                            minisec = abs(int(math.modf(stop)[0] * 100 - 1))  # 此处减1是为了防止两个字幕同时出现
+                            file += str(hour).zfill(2) + ':' + str(minute).zfill(2) + ':' + str(sec).zfill(2) + ',' + str(minisec).zfill(2)
+                            file += '\n' + content + '\n\n'  # 加入字幕文字
+                            i += 1
+                        srtfilename = '%s.srt' % get_filename(tttt)
+                        with open(os.path.join(".", srtfilename), 'w', encoding='utf-8') as f:
+                            f.write(file)  # 将数据写入文件
+
                     # purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
                     # self.__class__().download_by_url(purl, **kwargs)
 
diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py
index c04a10a74d..3ecd20e6e7 100644
--- a/src/you_get/util/fs.py
+++ b/src/you_get/util/fs.py
@@ -43,5 +43,5 @@ def legitimize(text, os=detect_os()):
         if text.startswith("."):
             text = text[1:]
 
-    text = text[:80] # Trim to 82 Unicode characters long
+    text = text[:160] # Trim to 82 Unicode characters long
     return text

From abd65c3c1479bbdcc280c198a852af0e4dd937ad Mon Sep 17 00:00:00 2001
From: ed <ed@zervice.us>
Date: Tue, 3 Sep 2024 09:44:30 +0800
Subject: [PATCH 2/2] bilibili download support

---
 src/you_get/extractors/bilibili.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index fb5444471d..8d694275be 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+import json
+import re
 
 from ..common import *
 from ..extractor import VideoExtractor
@@ -339,21 +341,15 @@ def prepare(self, **kwargs):
 
         # bangumi
         elif sort == 'bangumi':
-            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
-            initial_state = json.loads(initial_state_text)
-
-            # warn if this bangumi has more than 1 video
-            epn = len(initial_state['epList'])
-            if epn > 1 and not kwargs.get('playlist'):
-                log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn)
+            eposide = kwargs['eposide_data']
 
             # set video title
-            self.title = initial_state['h1Title']
+            self.title = eposide['long_title']
 
             # construct playinfos
-            ep_id = initial_state['epInfo']['id']
-            avid = initial_state['epInfo']['aid']
-            cid = initial_state['epInfo']['cid']
+            ep_id = eposide['ep_id']
+            avid = eposide['aid']
+            cid = eposide['cid']
             playinfos = []
             api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
             api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
@@ -717,13 +713,17 @@ def download_playlist_by_url(self, url, **kwargs):
                                 self.download(**kwargs)
 
         elif sort == 'bangumi':
-            initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)')  # FIXME
-            initial_state = json.loads(initial_state_text)
-            epn, i = len(initial_state['epList']), 0
-            for ep in initial_state['epList']:
+            epId = re.search(r'"videoId":"ep(.*?)"', html_content).group(1)
+            eposide_content = get_content(url="https://api.bilibili.com/pgc/view/web/ep/list?ep_id=%s" % (epId),
+                                          headers=self.bilibili_headers(referer=self.url))
+            eposide_json = json.loads(eposide_content)["result"]["episodes"]
+            epn, i = len(eposide_json), 0
+            for ep in eposide_json:
                 i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
                 ep_id = ep['id']
                 epurl = 'https://www.bilibili.com/bangumi/play/ep%s/' % ep_id
+                kwargs['eposide_data'] = ep
+                ep['long_title']= ("%02d" % i) + ep['long_title']
                 self.__class__().download_by_url(epurl, **kwargs)
 
         elif sort == 'bangumi_md':