-
Notifications
You must be signed in to change notification settings - Fork 6
/
main.py
144 lines (102 loc) · 3.86 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import requests
import json
import getpass
username = getpass.getpass("ID: ")
password = getpass.getpass("pw: ")
target_id = getpass.getpass("target ID: ")
origin_url = 'https://www.instagram.com'
login_url = origin_url + '/accounts/login/ajax/'
user_agent = 'Chrome/59.0.3071.115'
#login ig and get cookies
session = requests.Session()
session.headers = {'user-agent': user_agent}
session.headers.update({'Referer': origin_url})
req = session.get(origin_url)
try:
req.raise_for_status()
except Exception as exc:
print('problem occur: %s' % (exc))
exit()
session.headers.update({'X-CSRFToken': req.cookies['csrftoken']})
login_data = {'username': username, 'password': password}
login = session.post(login_url, data=login_data, allow_redirects=True)
try:
login.raise_for_status()
except Exception as exc:
print('problem occur: %s' % (exc))
exit()
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
cookies = login.cookies
login_text = json.loads(login.text)
#Get url of pictures,
#if the post has single picture, get url,
#if the post has multiple pictures, get the url of the post,
#request the url and get all urls of pictures
#save all urls in pics_url_list
def handle_12_posts(data, origin_url, target_id):
pics_url_list = []
for i in data['user']['media']['nodes']:
typename = str(i['__typename'])
if typename == "GraphImage":
pic_url = str(i['display_src'])
print(pic_url)
pics_url_list.append(pic_url)
if typename == "GraphSidecar":
code = str(i['code'])
post_url = origin_url + '/p/' + code + '/?__a=1'
response = session.get(post_url)
try:
response.raise_for_status()
except Exception as exc:
print('problem occur: %s' % (exc))
exit()
post_data = response.json()
node_arr = post_data['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
for node in node_arr:
pic_url = node['node']['display_url']
print(pic_url)
pics_url_list.append(pic_url)
return pics_url_list
def get_end_cursor(data):
return str(data['user']['media']['page_info']['end_cursor'])
def refresh_url(origin_url, target_id, end_cursor):
return str(origin_url + '/' + target_id + '/?__a=1&max_id=' + end_cursor)
def main():
pics_url_list = []
target_url = origin_url + '/' + target_id +'/?__a=1'
req = session.get(target_url)
try:
req.raise_for_status()
except Exception as exc:
print('problem occur: %s' % (exc))
exit()
data = req.json()
not_last = True
#Only 12 posts are received when directing to one's profile,
#we have to get the end_cursor and redirect to get the next 12 posts,
#keep redirecting until number of posts is less than 12
while(not_last):
pics_url_list.extend(handle_12_posts(data, origin_url, target_id))
end_cursor = get_end_cursor(data)
target_url = refresh_url(origin_url, target_id, end_cursor)
data = session.get(target_url).json()
if(len(data['user']['media']['nodes']) < 12):
break
# Last posts
pics_url_list.extend(handle_12_posts(data, origin_url, target_id))
#download all pictures
for url in pics_url_list:
print('url = ' + url)
with open(url.split('/').pop(),'wb') as handle:
response = session.get(url, stream=True)
try:
response.raise_for_status()
except Exception as exc:
print('problem occur: %s' % (exc))
exit()
for block in response.iter_content(15000):
if not block:
break
handle.write(block)
if __name__ == '__main__':
main()