-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect_comments.py
67 lines (57 loc) · 3.64 KB
/
collect_comments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from googleapiclient.discovery import build
import pandas as pd
from tqdm import tqdm
"""Collecting comments using Google API.
Let's take Varlamov's video as a basis:
'Почему Россия — не Израиль в мире прививок | Как теория заговора замедляет распространение Спутник V'
link for video: https://www.youtube.com/watch?v=fvUU943dhFY
"""
API_KEY = "xxxxx" # input your API key (Follow instructions for getting key https://developers.google.com/youtube/v3/getting-started?hl=ru)
VIDEO_ID = "fvUU943dhFY" # input video id (last symbols after '=' in video link)
comments_list = []
class Collect_Comments:
def __init__(self, api_key, video_id):
self.api_key = api_key
self.video_id = video_id
self.connection = build('youtube', 'v3', developerKey=self.api_key)
def collect_comments(self, resp):
for item in tqdm(resp['items']):
name = item["snippet"]['topLevelComment']["snippet"]["authorDisplayName"]
comment = item["snippet"]['topLevelComment']["snippet"]["textDisplay"]
published_at = item["snippet"]['topLevelComment']["snippet"]['publishedAt']
likes = item["snippet"]['topLevelComment']["snippet"]['likeCount']
replies = item["snippet"]['totalReplyCount']
if [name, comment, published_at, likes, replies] not in comments_list:
comments_list.append([name, comment, published_at, likes, replies])
totalReplyCount = item["snippet"]['totalReplyCount']
if totalReplyCount > 0:
parent = item["snippet"]['topLevelComment']["id"]
response_2 = self.connection.comments().list(part='snippet', maxResults='100', parentId=parent,
textFormat="plainText").execute()
for i in response_2["items"]:
name = i["snippet"]["authorDisplayName"]
comment = i["snippet"]["textDisplay"]
published_at = i["snippet"]['publishedAt']
likes = i["snippet"]['likeCount']
replies = ""
if [name, comment, published_at, likes, replies] not in comments_list:
comments_list.append([name, comment, published_at, likes, replies])
def save_to_csv(self, path_to_save_file):
df = pd.DataFrame({'Name': [i[0] for i in comments_list], 'Comment': [i[1] for i in comments_list],
'Time': [i[2] for i in comments_list],
'Likes': [i[3] for i in comments_list], 'Reply Count': [i[4] for i in comments_list]})
df.to_csv(path_to_save_file, index=False)
return "Successful!2 Check the CSV file that you have just created."
def get_comments(self, path_to_save_file):
response = self.connection.commentThreads().list(part='snippet, replies', videoId=self.video_id).execute()
self.collect_comments(response)
while 'nextPageToken' in response:
response = self.connection.commentThreads().list(part='snippet,replies', videoId=self.video_id,
pageToken=response["nextPageToken"]).execute()
self.collect_comments(response)
self.save_to_csv(path_to_save_file)
print('Count of getting comments : ', len(comments_list))
if __name__ == "__main__":
path = '' # path to save new csv file
comments = Collect_Comments(API_KEY, VIDEO_ID) # create an instance of the class
comments.get_comments(path) # collect of comments