-
Notifications
You must be signed in to change notification settings - Fork 1
/
__init__.py
169 lines (122 loc) · 5.78 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Date: 29-08-2020
Created by Sameer Narkhede
Project : python_sample
Module : python_sample_scrape_do
"""
import traceback
import requests
class Scrape_do_Exception(BaseException):
"""
custom Scrape.do exception class
"""
pass
class python_sample:
"""
Python sample class for proxy rotating api's https://scrape.do
"""
def __init__(self):
self.scrape_do_api_token = None
def set_api_token(self, api_token=None):
"""
set scrape.do api token you can find this token from https://scrape.do/dashboard this needs login.
:param api_token: String API_TOKEN from https://scrape.do
:return: None
"""
self.scrape_do_api_token = api_token
def account_status(self):
"""
returns the statistics of your scrape.do account
:return: Dictionary of statistics
"""
if self.scrape_do_api_token:
response = requests.get("http://api.scrape.do/info?token=" + self.scrape_do_api_token)
return response.json()
else:
raise Scrape_do_Exception("api-token is not configured")
def create_request_url(self, url, method="GET", payload=None, headers=None, render=False,
super_proxies=False, geo_code=None):
"""
Best Rotating Proxy & Scraping API Alternative https://scrape.do/ api handler
new request url
:param url: String the url user needs to scrape. Ex. 'https://httpbin.org/get'
:param method: String method for the url request. Ex. ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``,
``PATCH``, or ``DELETE``
:param payload: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the
:param headers: (optional) Dictionary of HTTP Headers to send with the request
:param render: (optional) Boolean - To use Javascript, all you need to do is setting render parameter to true
** Beware that you need a business plan to use this feature!
:param super_proxies:(optional) Boolean - To use Super Proxies, all you need to do is setting super parameter
to true
** Beware that you need a business plan to use this feature!
:param geo_code: geocode in 'us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br' ex. us
** Beware that you need a Pro plan to use this feature!
:return: response of scrape.do api
"""
# check if there is token is configured
if self.scrape_do_api_token:
base_url = "http://api.scrape.do"
params = {'token': self.scrape_do_api_token}
if headers is None:
headers = {}
if payload is None:
payload = {}
if headers is not None and headers is not {}:
params['customHeaders'] = 'true'
params['url'] = url
if render:
params['render'] = 'true' if render else 'false'
if super_proxies:
params['super'] = 'true' if super_proxies else 'false'
if geo_code:
geocodes = ['us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br']
if geo_code not in geocodes:
raise Scrape_do_Exception(
"Geo-Code is not valid. please provide geo-code in " + str(geocodes))
params['geo_code'] = geo_code
methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
if method not in methods:
raise Scrape_do_Exception("method is not valid. please provide method in " + str(methods))
response = requests.request(method, base_url, params=params, headers=headers, data=payload)
print("status_code:" + str(response.status_code))
if response.status_code == 200:
return response.text.encode('utf8')
elif response.status_code == 404:
raise Scrape_do_Exception("Target url not found :: Pass valid URL")
elif response.status_code == 429:
raise Scrape_do_Exception("You are sending too many concurrent request :: Please upgrade your "
"plan or contact with sale.")
elif response.status_code == 401:
raise Scrape_do_Exception("You have not credit :: Please upgrade your plan or contact with sale.")
elif response.status_code == 502:
raise Scrape_do_Exception("Gateway Error :: Please retry and check response. If you live "
"constantly, contact support@scrape.do")
else:
raise Scrape_do_Exception("api-token is not configured")
if __name__ == '__main__':
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
# create an python-sample object
sample = python_sample()
# set the scrape.do api key
# sample.set_api_token(api_token=API_TOKEN)
# Get Scrape.do account statistics
try:
resp = sample.account_status()
print("Response Type " + str(type(resp)))
print(resp)
except ConnectionError as e:
print(str(e))
print(traceback.format_exc())
except Scrape_do_Exception as e:
print(str(e))
print(traceback.format_exc())
try:
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
render=False, super_proxies=False, geo_code=None)
print(resp)
except ConnectionError as e:
print(str(e))
print(traceback.format_exc())
except Scrape_do_Exception as e:
print(str(e))
print(traceback.format_exc())