-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsettings.py
48 lines (39 loc) · 1.32 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# default scrapy settings
# Find out more https://docs.scrapy.org/en/latest/topics/settings.html
ITEM_PIPELINES = {
'scrapy_mysql_pipeline.MySQLPipeline': 301,
}
DOWNLOADER_MIDDLEWARES = {
# replace user-agent logic
'scrapy_fake_useragent.middleware.RandomUserAgentMiddleware': 400,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
# replace default retry
'scrapy_fake_useragent.middleware.RetryUserAgentMiddleware': 401,
'scrapy.downloadermiddlewares.retry.RetryMiddleware': None,
}
LOG_LEVEL = 'INFO'
COOKIES_ENABLED = False
CONCURRENT_REQUESTS = 5
DOWNLOAD_DELAY = 2.5
FEED_EXPORT_ENCODING = 'utf-8'
# MySQL pipeline settings
# Find out more https://github.com/IaroslavR/scrapy-mysql-pipeline
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'crawler'
MYSQL_PASSWORD = 'randompass'
MYSQL_DB = 'crawl'
MYSQL_TABLE = 'company'
MYSQL_UPSERT = True
# Connection
# User-Agents https://github.com/alecxe/scrapy-fake-useragent
RANDOM_UA_PER_PROXY = True
# Proxy https://github.com/TeamHG-Memex/scrapy-rotating-proxies
DOWNLOAD_TIMEOUT = 10
ROTATING_PROXY_LIST = []
if ROTATING_PROXY_LIST:
DOWNLOADER_MIDDLEWARES.update(
{
'rotating_proxies.middlewares.RotatingProxyMiddleware': 310,
'rotating_proxies.middlewares.BanDetectionMiddleware': 320,
})