-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose_selenium_nutch_solr.yaml
81 lines (74 loc) · 1.83 KB
/
docker-compose_selenium_nutch_solr.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# To execute this docker-compose yml file use `docker-compose -f <file_name> up`
# Add the `-d` flag at the end for detached execution
version: "2"
services:
# selenium hub
selenium-hub:
image: selenium/hub:3.141.5-astatine
container_name: selenium-hub
ports:
- "4444:4444"
environment:
- GRID_BROWSER_TIMEOUT=90
- GRID_TIMEOUT=90
- GRID_MAX_SESSION=100
mem_limit: 2000000000
restart: always
# selenium node
# chrome:
# image: selenium/node-chrome:3.141.5-astatine
# depends_on:
# - selenium-hub
# environment:
# - HUB_HOST=selenium-hub
# - HUB_PORT=4444
# - START_XVFB=false
# selenium node
firefox:
image: selenium/node-firefox:3.141.5-astatine
depends_on:
- selenium-hub
volumes:
- /dev/shm:/dev/shm
environment:
- HUB_HOST=selenium-hub
- HUB_PORT=4444
- START_XVFB=false
mem_limit: 2000000000
restart: always
# apache solr
solr:
image: "solr:7.3.1"
container_name: "solr"
ports:
- "8983:8983"
entrypoint:
- docker-entrypoint.sh
- solr-precreate
- mycore
depends_on:
- selenium-hub
mem_limit: 2000000000
restart: always
# apache nutch
nutch:
build: .
container_name: "nutch"
ports:
- "5555:5555"
- "8081:8081"
- "8080:8080"
depends_on:
- solr
environment:
- JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
volumes:
- ./index-writers.xml:/root/conf/index-writers.xml
- ./nutch-site.xml:/root/conf/nutch-site.xml
- ./regex-urlfilter.txt:/root/conf/regex-urlfilter.txt
- ./agents.txt:/root/conf/agents.txt
- ./batch_urls:/root/conf/batch_urls
- ./copy_conf_files.sh:/root/copy_conf_files.sh
stdin_open: true
mem_limit: 2000000000
restart: always