diff --git a/requirements.txt b/requirements.txt index 08c48bf816e7..d5da2ddb0768 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,4 @@ requests notion-client github-heatmap retrying -pendulum -beautifulsoup4 \ No newline at end of file +pendulum \ No newline at end of file diff --git a/scripts/__pycache__/config.cpython-311.pyc b/scripts/__pycache__/config.cpython-311.pyc index f83849fbaed6..c86661ea6f5d 100644 Binary files a/scripts/__pycache__/config.cpython-311.pyc and b/scripts/__pycache__/config.cpython-311.pyc differ diff --git a/scripts/__pycache__/notion_helper.cpython-311.pyc b/scripts/__pycache__/notion_helper.cpython-311.pyc index 877a389106de..aa3f1d03a6c0 100644 Binary files a/scripts/__pycache__/notion_helper.cpython-311.pyc and b/scripts/__pycache__/notion_helper.cpython-311.pyc differ diff --git a/scripts/__pycache__/utils.cpython-311.pyc b/scripts/__pycache__/utils.cpython-311.pyc index 22ae2c1ce5dc..30860e3d6292 100644 Binary files a/scripts/__pycache__/utils.cpython-311.pyc and b/scripts/__pycache__/utils.cpython-311.pyc differ diff --git a/scripts/__pycache__/weread_api.cpython-311.pyc b/scripts/__pycache__/weread_api.cpython-311.pyc index e6cbda111659..373c6cdff51a 100644 Binary files a/scripts/__pycache__/weread_api.cpython-311.pyc and b/scripts/__pycache__/weread_api.cpython-311.pyc differ diff --git a/scripts/book.py b/scripts/book.py index 248fba0b8891..c76143bc63a3 100644 --- a/scripts/book.py +++ b/scripts/book.py @@ -10,16 +10,17 @@ from weread_api import WeReadApi import utils from config import ( - book_properties_name_dict, book_properties_type_dict, ) -from bs4 import BeautifulSoup from retrying import retry TAG_ICON_URL = "https://www.notion.so/icons/tag_gray.svg" USER_ICON_URL = "https://www.notion.so/icons/user-circle-filled_gray.svg" BOOK_ICON_URL = "https://www.notion.so/icons/book_gray.svg" +rating = {"poor": "⭐️", "fair": "⭐️⭐️⭐️", "good": "⭐️⭐️⭐️⭐️⭐️"} + + @retry(stop_max_attempt_number=3, wait_fixed=5000) def get_douban_url(isbn): print(f"get_douban_url {isbn} ") @@ -42,11 +43,12 @@ def get_douban_url(isbn): return None return urls[0].get("url") + def insert_book_to_notion(books, index, bookId): """插入Book到Notion""" book = {} if bookId in archive_dict: - book["archive"] = archive_dict.get(bookId) + book["书架分类"] = archive_dict.get(bookId) if bookId in notion_books: book.update(notion_books.get(bookId)) bookInfo = weread_api.get_bookinfo(bookId) @@ -70,17 +72,24 @@ def insert_book_to_notion(books, index, bookId): douban_url = get_douban_url(isbn) if douban_url: book["douban_url"] = douban_url - book["cover"] = cover - book["readingProgress"] = ( + book["封面"] = cover + book["阅读进度"] = ( 100 if (book.get("markedStatus") == 4) else book.get("readingProgress", 0) ) / 100 - markedStatus = book.get("markedStatus") + markedStatus = book.get("markedStatus") status = "想读" - if(markedStatus==4): + if markedStatus == 4: status = "已读" - elif(book.get("readingTime",0)>=60): + elif book.get("readingTime", 0) >= 60: status = "在读" - book["status"] = status + book["阅读状态"] = status + book["阅读时长"] = book.get("readingTime") + book["阅读天数"] = book.get("totalReadDay") + book["评分"] = book.get("newRating") + if book.get("newRatingDetail") and book.get("newRatingDetail").get("myRating"): + book["我的评分"] = rating.get(book.get("newRatingDetail").get("myRating")) + elif status=="已读": + book["我的评分"] = "未评分" date = None if book.get("finishedDate"): date = book.get("finishedDate") @@ -88,33 +97,35 @@ def insert_book_to_notion(books, index, bookId): date = book.get("lastReadingDate") elif book.get("readingBookDate"): date = book.get("readingBookDate") - book["date"] = date + book["时间"] = date + book["开始阅读时间"] = book.get("beginReadingDate") + book["最后阅读时间"] = book.get("lastReadingDate") if bookId not in notion_books: - book["author"] = [ + book["书名"] = book.get("title") + book["BookId"] = book.get("bookId") + book["ISBN"] = book.get("isbn") + book["链接"] = utils.get_weread_url(bookId) + book["简介"] = book.get("intro") + book["作者"] = [ notion_helper.get_relation_id( x, notion_helper.author_database_id, USER_ICON_URL ) for x in book.get("author").split(" ") ] - book["url"] = utils.get_weread_url(bookId) if book.get("categories"): - book["categories"] = [ + book["分类"] = [ notion_helper.get_relation_id( x.get("title"), notion_helper.category_database_id, TAG_ICON_URL ) for x in book.get("categories") - ] - else: - book.pop("categories",None) - book.pop("author",None) - properties = utils.get_properties( - book, book_properties_name_dict, book_properties_type_dict - ) + ] + properties = utils.get_properties(book, book_properties_type_dict) if book.get("date"): notion_helper.get_date_relation( properties, pendulum.from_timestamp(book.get("date"), tz="Asia/Shanghai"), ) + print(f"正在插入《{book.get('title')}》,一共{len(books)}本,当前是第{index+1}本。") parent = {"database_id": notion_helper.book_database_id, "type": "database_id"} if bookId in notion_books: @@ -159,6 +170,10 @@ def insert_book_to_notion(books, index, bookId): and value.get("cover") and (not value.get("cover").endswith("/0.jpg")) and (not value.get("cover").endswith("parsecover")) + and ( + value.get("status") != "已读" + or (value.get("status") == "已读" and value.get("myRating")) + ) ): not_need_sync.append(key) notebooks = weread_api.get_notebooklist() diff --git a/scripts/config.py b/scripts/config.py index 2935af824a17..a9b340ca576f 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -9,29 +9,6 @@ TITLE = "title" SELECT = "select" -book_properties_name_dict = { - "title":"书名", - "bookId":"BookId", - "isbn":"ISBN", - "url":"链接", - "author":"作者", - "Sort":"Sort", - "newRating":"评分", - "cover":"封面", - "categories":"分类", - "status":"阅读状态", - "readingTime":"阅读时长", - "readingProgress":"阅读进度", - "totalReadDay":"阅读天数", - "date":"时间", - "beginReadingDate":"开始阅读时间", - "lastReadingDate":"最后阅读时间", - "intro":"简介", - "archive":"书架分类", - "douban_url":"豆瓣链接", - "neodb_url":"NeoDB链接", -} - book_properties_type_dict = { "书名":TITLE, "BookId":RICH_TEXT, @@ -51,6 +28,6 @@ "最后阅读时间":DATE, "简介":RICH_TEXT, "书架分类":SELECT, + "我的评分":SELECT, "豆瓣链接":URL, - "NeoDB链接":URL, } diff --git a/scripts/notion_helper.py b/scripts/notion_helper.py index 4af3f62a2f85..c6f057b1858c 100644 --- a/scripts/notion_helper.py +++ b/scripts/notion_helper.py @@ -30,91 +30,129 @@ class NotionHelper: database_name_dict = { - "BOOK_DATABASE_NAME":"书架", - "REVIEW_DATABASE_NAME":"笔记", - "BOOKMARK_DATABASE_NAME":"划线", - "DAY_DATABASE_NAME":"日", - "WEEK_DATABASE_NAME":"周", - "MONTH_DATABASE_NAME":"月", - "YEAR_DATABASE_NAME":"年", - "CATEGORY_DATABASE_NAME":"分类", - "AUTHOR_DATABASE_NAME":"作者", - "CHAPTER_DATABASE_NAME":"章节", + "BOOK_DATABASE_NAME": "书架", + "REVIEW_DATABASE_NAME": "笔记", + "BOOKMARK_DATABASE_NAME": "划线", + "DAY_DATABASE_NAME": "日", + "WEEK_DATABASE_NAME": "周", + "MONTH_DATABASE_NAME": "月", + "YEAR_DATABASE_NAME": "年", + "CATEGORY_DATABASE_NAME": "分类", + "AUTHOR_DATABASE_NAME": "作者", + "CHAPTER_DATABASE_NAME": "章节", } database_id_dict = {} image_dict = {} def __init__(self): self.client = Client(auth=os.getenv("NOTION_TOKEN"), log_level=logging.ERROR) - self.__cache={} - self.search_database(self.extract_page_id(os.getenv("NOTION_PAGE"))) + self.__cache = {} + self.page_id = self.extract_page_id(os.getenv("NOTION_PAGE")) + self.search_database(self.page_id) for key in self.database_name_dict.keys(): - if(os.getenv(key)!=None and os.getenv(key)!=""): + if os.getenv(key) != None and os.getenv(key) != "": self.database_name_dict[key] = os.getenv(key) - self.book_database_id = self.database_id_dict.get(self.database_name_dict.get("BOOK_DATABASE_NAME")) - self.review_database_id = self.database_id_dict.get(self.database_name_dict.get("REVIEW_DATABASE_NAME")) - self.bookmark_database_id = self.database_id_dict.get(self.database_name_dict.get("BOOKMARK_DATABASE_NAME")) - self.day_database_id = self.database_id_dict.get(self.database_name_dict.get("DAY_DATABASE_NAME")) - self.week_database_id = self.database_id_dict.get(self.database_name_dict.get("WEEK_DATABASE_NAME")) - self.month_database_id = self.database_id_dict.get(self.database_name_dict.get("MONTH_DATABASE_NAME")) - self.year_database_id = self.database_id_dict.get(self.database_name_dict.get("YEAR_DATABASE_NAME")) - self.category_database_id = self.database_id_dict.get(self.database_name_dict.get("CATEGORY_DATABASE_NAME")) - self.author_database_id = self.database_id_dict.get(self.database_name_dict.get("AUTHOR_DATABASE_NAME")) - self.chapter_database_id = self.database_id_dict.get(self.database_name_dict.get("CHAPTER_DATABASE_NAME")) + self.book_database_id = self.database_id_dict.get( + self.database_name_dict.get("BOOK_DATABASE_NAME") + ) + self.review_database_id = self.database_id_dict.get( + self.database_name_dict.get("REVIEW_DATABASE_NAME") + ) + self.bookmark_database_id = self.database_id_dict.get( + self.database_name_dict.get("BOOKMARK_DATABASE_NAME") + ) + self.day_database_id = self.database_id_dict.get( + self.database_name_dict.get("DAY_DATABASE_NAME") + ) + self.week_database_id = self.database_id_dict.get( + self.database_name_dict.get("WEEK_DATABASE_NAME") + ) + self.month_database_id = self.database_id_dict.get( + self.database_name_dict.get("MONTH_DATABASE_NAME") + ) + self.year_database_id = self.database_id_dict.get( + self.database_name_dict.get("YEAR_DATABASE_NAME") + ) + self.category_database_id = self.database_id_dict.get( + self.database_name_dict.get("CATEGORY_DATABASE_NAME") + ) + self.author_database_id = self.database_id_dict.get( + self.database_name_dict.get("AUTHOR_DATABASE_NAME") + ) + self.chapter_database_id = self.database_id_dict.get( + self.database_name_dict.get("CHAPTER_DATABASE_NAME") + ) self.update_book_database() - def extract_page_id(self,notion_url): + def extract_page_id(self, notion_url): # 正则表达式匹配 32 个字符的 Notion page_id - match = re.search(r"([a-f0-9]{32}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})", notion_url) + match = re.search( + r"([a-f0-9]{32}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})", + notion_url, + ) if match: return match.group(0) else: raise Exception(f"获取NotionID失败,请检查输入的Url是否正确") - def search_database(self,block_id): + + + def search_database(self, block_id): children = self.client.blocks.children.list(block_id=block_id)["results"] # 遍历子块 for child in children: # 检查子块的类型 - + if child["type"] == "child_database": - self.database_id_dict[child.get('child_database').get('title')] = child.get("id") + self.database_id_dict[ + child.get("child_database").get("title") + ] = child.get("id") elif child["type"] == "image": - self.image_dict["url"] = child.get('image').get('external').get('url') - self.image_dict["id"] = child.get('id') + self.image_dict["url"] = child.get("image").get("external").get("url") + self.image_dict["id"] = child.get("id") # 如果子块有子块,递归调用函数 if "has_children" in child and child["has_children"]: self.search_database(child["id"]) def update_book_database(self): """更新数据库""" - response = self.client.databases.retrieve( - database_id=self.book_database_id - ) + response = self.client.databases.retrieve(database_id=self.book_database_id) id = response.get("id") properties = response.get("properties") update_properties = {} - if properties.get("阅读时长") is None or properties.get("阅读时长").get("type") != "number": + if ( + properties.get("阅读时长") is None + or properties.get("阅读时长").get("type") != "number" + ): update_properties["阅读时长"] = {"number": {}} - if properties.get("书架分类") is None or properties.get("书架分类").get("type") != "select": + if ( + properties.get("书架分类") is None + or properties.get("书架分类").get("type") != "select" + ): update_properties["书架分类"] = {"select": {}} - if properties.get("豆瓣链接") is None or properties.get("豆瓣链接").get("type") != "url": + if ( + properties.get("豆瓣链接") is None + or properties.get("豆瓣链接").get("type") != "url" + ): update_properties["豆瓣链接"] = {"url": {}} - """NeoDB先不添加了,现在受众还不光,可能有的小伙伴不知道是干什么的""" + if ( + properties.get("我的评分") is None + or properties.get("我的评分").get("type") != "select" + ): + update_properties["我的评分"] = {"select": {}} + if ( + properties.get("豆瓣短评") is None + or properties.get("豆瓣短评").get("type") != "rich_text" + ): + update_properties["豆瓣短评"] = {"rich_text": {}} + """NeoDB先不添加了,现在受众还不广,可能有的小伙伴不知道是干什么的""" # if properties.get("NeoDB链接") is None or properties.get("NeoDB链接").get("type") != "url": # update_properties["NeoDB链接"] = {"url": {}} if len(update_properties) > 0: - self.client.databases.update( - database_id=id, properties=update_properties - ) + self.client.databases.update(database_id=id, properties=update_properties) - def update_image_block_link(self,block_id, new_image_url): + def update_image_block_link(self, block_id, new_image_url): # 更新 image block 的链接 self.client.blocks.update( - block_id=block_id, - image={ - "external": { - "url": new_image_url - } - } + block_id=block_id, image={"external": {"url": new_image_url}} ) def get_week_relation_id(self, date): @@ -145,7 +183,7 @@ def get_year_relation_id(self, date): def get_day_relation_id(self, date): new_date = date.replace(hour=0, minute=0, second=0, microsecond=0) - timestamp = (new_date-timedelta(hours=8)).timestamp() + timestamp = (new_date - timedelta(hours=8)).timestamp() day = new_date.strftime("%Y年%m月%d日") properties = { "日期": get_date(format_date(date)), @@ -231,7 +269,7 @@ def insert_review(self, id, review): if "createTime" in review: create_time = timestamp_to_date(int(review.get("createTime"))) properties["Date"] = get_date(create_time.strftime("%Y-%m-%d %H:%M:%S")) - self.get_date_relation(properties,create_time) + self.get_date_relation(properties, create_time) parent = {"database_id": self.review_database_id, "type": "database_id"} self.create_page(parent, properties, icon) @@ -253,9 +291,7 @@ def insert_chapter(self, id, chapter): @retry(stop_max_attempt_number=3, wait_fixed=5000) def update_book_page(self, page_id, properties): - return self.client.pages.update( - page_id=page_id, properties=properties - ) + return self.client.pages.update(page_id=page_id, properties=properties) @retry(stop_max_attempt_number=3, wait_fixed=5000) def update_page(self, page_id, properties, icon): @@ -290,7 +326,7 @@ def append_blocks_after(self, block_id, children, after): @retry(stop_max_attempt_number=3, wait_fixed=5000) def delete_block(self, block_id): return self.client.blocks.delete(block_id=block_id) - + @retry(stop_max_attempt_number=3, wait_fixed=5000) def get_all_book(self): """从Notion中获取所有的书籍""" @@ -300,17 +336,19 @@ def get_all_book(self): bookId = get_property_value(result.get("properties").get("BookId")) books_dict[bookId] = { "pageId": result.get("id"), - "readingTime": get_property_value(result.get("properties").get("阅读时长")) , - "category": get_property_value(result.get("properties").get("书架分类")) , - "Sort": get_property_value(result.get("properties").get("Sort")) , - "douban_url": get_property_value(result.get("properties").get("豆瓣链接")) , - "cover": get_property_value(result.get("properties").get("封面")) , + "readingTime": get_property_value(result.get("properties").get("阅读时长")), + "category": get_property_value(result.get("properties").get("书架分类")), + "Sort": get_property_value(result.get("properties").get("Sort")), + "douban_url": get_property_value(result.get("properties").get("豆瓣链接")), + "cover": get_property_value(result.get("properties").get("封面")), + "myRating": get_property_value(result.get("properties").get("我的评分")), + "comment": get_property_value(result.get("properties").get("豆瓣短评")), + "status": get_property_value(result.get("properties").get("阅读状态")), } return books_dict - @retry(stop_max_attempt_number=3, wait_fixed=5000) - def query_all_by_book(self,database_id,filter): + def query_all_by_book(self, database_id, filter): results = [] has_more = True start_cursor = None @@ -325,6 +363,7 @@ def query_all_by_book(self,database_id,filter): has_more = response.get("has_more") results.extend(response.get("results")) return results + @retry(stop_max_attempt_number=3, wait_fixed=5000) def query_all(self, database_id): """获取database中所有的数据""" @@ -341,8 +380,8 @@ def query_all(self, database_id): has_more = response.get("has_more") results.extend(response.get("results")) return results - - def get_date_relation(self,properties,date): + + def get_date_relation(self, properties, date): properties["年"] = get_relation( [ self.get_year_relation_id(date), diff --git a/scripts/utils.py b/scripts/utils.py index 39a5e4a2b4f9..66461d8fad6f 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -217,49 +217,48 @@ def get_first_and_last_day_of_week(date): return first_day_of_week, last_day_of_week -def get_properties(dict1, dict2, dict3): +def get_properties(dict1, dict2): properties = {} - for key, value in dict2.items(): - property_type = dict3.get(value) - property_value = dict1.get(key) - if property_value == None: + for key, value in dict1.items(): + type = dict2.get(key) + if value == None: continue property = None - if property_type == TITLE: + if type == TITLE: property = { "title": [ - {"type": "text", "text": {"content": property_value[:MAX_LENGTH]}} + {"type": "text", "text": {"content": value[:MAX_LENGTH]}} ] } - elif property_type == RICH_TEXT: + elif type == RICH_TEXT: property = { "rich_text": [ - {"type": "text", "text": {"content": property_value[:MAX_LENGTH]}} + {"type": "text", "text": {"content": value[:MAX_LENGTH]}} ] } - elif property_type == NUMBER: - property = {"number": property_value} - elif property_type == STATUS: - property = {"status": {"name": property_value}} - elif property_type == FILES: - property = {"files": [{"type": "external", "name": "Cover", "external": {"url": property_value}}]} - elif property_type == DATE: + elif type == NUMBER: + property = {"number": value} + elif type == STATUS: + property = {"status": {"name": value}} + elif type == FILES: + property = {"files": [{"type": "external", "name": "Cover", "external": {"url": value}}]} + elif type == DATE: property = { "date": { "start": pendulum.from_timestamp( - property_value, tz="Asia/Shanghai" + value, tz="Asia/Shanghai" ).to_datetime_string(), "time_zone": "Asia/Shanghai", } } - elif property_type==URL: - property = {"url": property_value} - elif property_type==SELECT: - property = {"select": {"name": property_value}} - elif property_type == RELATION: - property = {"relation": [{"id": id} for id in property_value]} + elif type==URL: + property = {"url": value} + elif type==SELECT: + property = {"select": {"name": value}} + elif type == RELATION: + property = {"relation": [{"id": id} for id in value]} if property: - properties[value] = property + properties[key] = property return properties