-
Notifications
You must be signed in to change notification settings - Fork 2
/
2.to_json.py
28 lines (24 loc) · 854 Bytes
/
2.to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import json
import sys
from loguru import logger
from bs4 import BeautifulSoup
import datetime
import time
def to_json(file_path):
logger.info(f"read {file_path}")
with open(file_path, "r+") as fp:
json_data = json.loads(fp.read())
json_data.sort(key=lambda x: x["created_time"])
results = [
{
"id": x.get("id"),
"content_text": BeautifulSoup(x.get('content'), "html.parser").text.replace("\n", ""),
"created_time": x.get("created_time")
} for x in json_data]
with open(f"{file_path}_sample.json", "w+") as wf:
wf.write(json.dumps(results, ensure_ascii=False, indent=4))
logger.info(f"{file_path} to json successfully.")
if __name__ == "__main__":
file_path = sys.argv[1]
to_json(file_path=file_path)
pass