Skip to content

Commit

Permalink
Add function to save tweets in json format
Browse files Browse the repository at this point in the history
  • Loading branch information
RafaelLeeImg committed Nov 29, 2022
1 parent b9429b8 commit 1248fa3
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions src/you_get/extractors/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
info = json.loads(api_content)
twitter_write_json(info, screen_name, item_id)
if item_id not in info['globalObjects']['tweets']:
# something wrong here
#log.wtf('[Failed] ' + info['timeline']['instructions'][0]['addEntries']['entries'][0]['content']['item']['content']['tombstone']['tombstoneInfo']['richText']['text'], exit_code=None)
Expand Down Expand Up @@ -103,6 +104,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
api_url = 'https://api.twitter.com/1.1/statuses/show/%s.json?tweet_mode=extended' % item_id
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
info = json.loads(api_content)
twitter_write_json(info, screen_name, item_id)
media = info['extended_entities']['media']

for medium in media:
Expand All @@ -128,6 +130,28 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
if not info_only:
download_urls(urls, title, ext, size, output_dir, merge=merge)

# this function save tweets in human readable json format
def twitter_write_json(info, screen_name, item_id):
# # uncomment these lines if you need the original api returned json
# info_string = json.dumps(info, indent=" ", ensure_ascii=False)
# with open (screen_name+'_'+item_id+"_tweet.json",'w') as fw:
# fw.write(info_string)
if 'globalObjects' in info.keys():
tweets=info['globalObjects']['tweets']
info_users=info["globalObjects"]['users']
tweets_simplified={}
for key in tweets.keys():
user_id_str = tweets[key]['user_id_str']
tweets_simplified[key]={}
tweets_simplified[key]['created_at'] = tweets[key]['created_at']
tweets_simplified[key]['user_id_str'] = tweets[key]['user_id_str']
tweets_simplified[key]['full_text'] = tweets[key]['full_text']
tweets_simplified[key]['name'] = info_users[user_id_str]['name']

tweet_string = json.dumps(tweets_simplified, indent=" ", ensure_ascii=False)
with open (screen_name+'_'+item_id+".json",'w') as fw:
fw.write(tweet_string)


site_info = "Twitter.com"
download = twitter_download
Expand Down

0 comments on commit 1248fa3

Please sign in to comment.