-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-system.yml
110 lines (100 loc) · 4.33 KB
/
run-system.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# start various applications in required configurations for normal operation
- name: ensure permissions for run are correct
hosts: pi8
become: yes
tasks:
- set_fact:
user: "{{ ansible_user }}"
- file:
path: "{{ item }}"
state: directory
owner: "{{ user }}"
group: lowpriv
mode: '0750'
loop:
- "/home/{{ user }}"
- "/home/{{ user }}/data"
- /data
- file:
path: "{{ item }}"
state: directory
owner: spider
group: lowpriv
mode: '0750'
with_items:
- /data/kafkaspider16
- name: run kafkaspider to crawl AU web
hosts: pi8
gather_facts: no
become: yes
become_user: spider
tasks:
- debug: msg="{{ SPIDER_ROOT }} {{ ROOT }}"
- command:
chdir: "{{ SPIDER_ROOT }}"
creates: "{{ SPIDER_ROOT }}/pid.kafkaspider" # ansible wont start this cmd if this file exists
cmd: /usr/local/bin/daemonize -E PASSWORD="{{ MONGO_RW_PASS }}" -c "{{ ROOT }}" -o log.out.kafkaspider -e log.err.kafkaspider /usr/bin/taskset -c 1,2 scrapy runspider kafkaspider.py -L INFO
when: SPIDER == 1
- name: crawl backlog of downloaded artefacts and ingest into MongoDB (program will exit once done)
hosts: pi8
gather_facts: no
become: yes
become_user: spider
tasks:
- stat: path="{{ DATA_ROOT }}"
register: data_root
- command:
chdir: "{{ SPIDER_ROOT }}"
creates: "{{ SPIDER_ROOT }}/pid.upload.artefacts"
cmd: /usr/local/bin/daemonize -E PASSWORD="{{ MONGO_RW_PASS }}" -c "{{ SPIDER_ROOT }}" -o log.out.uploadartefacts -e log.err.uploadartefacts /usr/bin/taskset -c 1 python3 "{{ ROOT }}/etl_upload_artefacts.py" --n 400000 --root "{{ DATA_ROOT }}" --artefacts "{{ ARTEFACT_TOPIC }}" --group "js2mongo-{{ inventory_hostname_short }}" --start earliest
when: SPIDER == 0 and data_root.stat.exists and data_root.stat.isdir
- name: compute feature vectors and ingest into MongoDB
hosts: thug_hosts
gather_facts: no
become_user: lowpriv
become: yes
tasks:
- command:
chdir: "{{ LOWPRIV_ROOT }}"
creates: "{{ LOWPRIV_ROOT }}/pid.make.fv"
cmd: /usr/local/bin/daemonize -E "PASSWORD={{ MONGO_RW_PASS }}" -c "{{ LOWPRIV_ROOT }}" -o log.out.makefv -e log.err.makefv /usr/bin/taskset -c 2,3 python3 "{{ ROOT }}/etl_make_fv.py" --cache 10000 --v
- name: run extra feature vector instance when backlogging (NO CPU affinity constraints)
hosts: x86_thug_hosts
gather_facts: no
become_user: lowpriv
become: yes
tasks:
- command:
chdir: "{{ LOWPRIV_ROOT }}"
creates: "{{ LOWPRIV_ROOT }}/pid.make.fv"
cmd: /usr/local/bin/daemonize -E "PASSWORD={{ MONGO_RW_PASS }}" -c "{{ LOWPRIV_ROOT }}" -o log.out.makefv -e log.err.makefv /usr/bin/python3 "{{ ROOT }}/etl_make_fv.py" --dbuser rw --cache 10000 --v
when: SPIDER == 2
- name: process JS artefacts and assign best control (if any)
hosts: thug_hosts
become_user: lowpriv
become: yes
tasks:
- command:
chdir: "{{ LOWPRIV_ROOT }}"
creates: "{{ LOWPRIV_ROOT }}/pid.eval.controls"
cmd: /usr/local/bin/daemonize -E "PASSWORD={{MONGO_RW_PASS}}" -c "{{ LOWPRIV_ROOT }}" -o log.out.evaljs -e log.err.evaljs /usr/bin/taskset -c 3 python3 "{{ ROOT }}/etl_eval_js_against_controls.py" --report-good --max-distance 300.0
- name: extra instance if falling behind on control evaluation
hosts: x86_thug_hosts
become_user: lowpriv
become: yes
tasks:
- command:
chdir: "{{ LOWPRIV_ROOT }}"
creates: "{{ LOWPRIV_ROOT }}/pid.eval.controls"
cmd: /usr/local/bin/daemonize -E "PASSWORD={{MONGO_RW_PASS}}" -c "{{ LOWPRIV_ROOT }}" -o log.out.evaljs -e log.err.evaljs /usr/bin/taskset -c 3 python3 "{{ ROOT }}/etl_eval_js_against_controls.py" --report-good --max-distance 300.0
when: SPIDER == 3
- name: store javascript snippets embedded on HTML pages limited to only 1 CPU core
hosts: x86_thug_hosts
become: yes
become_user: spider
tasks:
- command:
chdir: "{{ SPIDER_ROOT }}"
creates: "{{ SPIDER_ROOT }}/pid.snippetspider"
cmd: /usr/local/bin/daemonize -E "PASSWORD={{MONGO_RW_PASS}}" -c "{{ SPIDER_ROOT }}" -o log.out.snippetspider -e log.err.snippetspider /usr/bin/taskset -c 2 scrapy runspider "/home/{{ ansible_user }}/src/snippet_spider.py" -L INFO
when: SPIDER == 1