forked from mozilla/translations
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Taskfile.yml
308 lines (269 loc) · 9.58 KB
/
Taskfile.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
version: '3'
tasks:
poetry-install-*:
internal: true
desc: Install only the group need for the dependencies of a task.
vars:
GROUP: '{{index .MATCH 0}}'
cmds:
- poetry install --only {{.GROUP}} --no-root
clean-venvs:
desc: Remove the virtual envs created by the test runner.
cmds:
- rm -rf data/task-venvs/*
download-logs:
desc: Download the logs for taskcluster. Requires --task-group-id
summary: |
The logs will be saved to: ./data/taskcluster-logs
Example:
task download-logs -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=logs {{.CLI_ARGS}}
download-evals:
desc: Downloads evaluation results from Taskcluster
summary: |
The evals will be saved to: ./data/taskcluster-evals
Example: `task download-evals -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=evals {{.CLI_ARGS}}
download-models:
desc: Downloads models from Taskcluster
summary: |
The models will be saved to: ./data/taskcluster-model
Example: `task download-models -- --task-group-id GU9ZyWFhRDe_nxlAHcen8g`
deps: [poetry-install-utils]
cmds:
- >-
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=model {{.CLI_ARGS}}
config-generator:
desc: Create a training config for a language pair
summary: |
The models will be saved to: ./data/taskcluster-model
Example: `task config-generator -- en fi`
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/config_generator.py {{.CLI_ARGS}}
build-mono-nllb:
desc: Build a monolingual NLLB datasets.
summary: |
The dataset will be saved to: ./data/nllb/nllb-mono-{lang}.txt.gz
Example: `task build-mono-nllb -- sl`
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/build-mono-nllb.py {{.CLI_ARGS}}
opuscleaner:
desc: Run the opuscleaner tool.
deps: [poetry-install-opuscleaner]
cmds:
- poetry run opuscleaner-server serve --host=0.0.0.0 --port=8000
inference-clean:
desc: Clean build artifacts from the inference directory.
cmds:
- >-
task docker-run -- ./inference/scripts/clean.sh
inference-build:
desc: Build inference engine.
cmds:
- >-
task docker-run -- ./inference/scripts/build-local.sh
inference-test:
desc: Run inference tests.
cmds:
- >-
task docker-run -- ./inference/scripts/unit-tests.sh
inference-build-wasm:
desc: Build inference engine WASM.
cmds:
- >-
task docker-run -- ./inference/scripts/build-wasm.sh
lint-black:
desc: Checks the styling of the Python code with Black.
deps: [poetry-install-black]
cmds:
- ./utils/tasks/black-check.sh
lint-black-fix:
desc: Fixes the styling of the Python code with Black.
deps: [poetry-install-black]
cmds:
- poetry run black . {{.CLI_ARGS}}
lint-ruff:
desc: Lints the Python code with the ruff linter.
deps: [poetry-install-lint]
cmds:
- poetry run ruff --version
- poetry run ruff check . {{.CLI_ARGS}}
lint-ruff-fix:
desc: Fixes Python code lint errors with the ruff linter.
deps: [poetry-install-lint]
cmds:
- poetry run ruff --version
- poetry run ruff check . --fix {{.CLI_ARGS}}
lint-fix:
desc: Fix all automatically fixable errors. This is useful to run before pushing.
cmds:
- task: lint-black-fix
- task: lint-ruff-fix
lint:
desc: Run all available linting tools.
cmds:
- task: lint-black
- task: lint-ruff
test:
desc: Run python pytests in the current host.
summary: |
Some tests only pass in Docker. You can run this command outside of docker for
some of the tests, or after running `task docker` to run them in the docker image.
Without any arguments, it runs all of the tests searching the paths specifiied in
testpaths in pyproject.toml.
You can also specificy a specific test to run:
task test -- tests/test_alignments.py
cmds:
- poetry install --only tests --only utils --no-root
- PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline" poetry run pytest -vv {{.CLI_ARGS}}
test-fast:
desc: Re-run tests in a faster configuration.
summary: |
This command skips taskgraph generation and skips the poetry install in order to
re-run tests quickly. If the taskgraph or dependencies are out of date, then tests
may incorrectly fail. It also outputs the captured stdout.
task test-fast -- tests/test_alignments.py
cmds:
- >-
SKIP_TASKGRAPH=1 PYTHONPATH="$(pwd):$(pwd)/taskcluster/scripts/pipeline"
poetry run pytest -vv -s {{.CLI_ARGS}}
test-docker:
desc: Run the unit tests in the docker image. Some tests require the pre-built Linux executables.
cmds:
- task docker-run -- task test
train:
desc: Start a training run
summary: Open up the train task from the CLI based on your current branch.
deps: [poetry-install-utils, poetry-install-taskcluster]
cmds:
- >-
poetry run python -W ignore utils/train.py {{.CLI_ARGS}}
docker:
desc: Interactively run the local docker test image.
deps: [docker-build]
summary: |
The local docker image includes the Linux x86 image, and pre-built binaries
that are used in training.
cmds:
- utils/tasks/docker-run.sh bash
docker-run:
desc: Run a command in the local docker instance. e.g. `docker-run -- echo "hello"`
deps: [docker-build]
summary: |
The local docker image includes the Linux x86 image, and pre-built binaries
that are used in training.
cmds:
- utils/tasks/docker-run.sh {{.CLI_ARGS}}
docker-build:
desc: Build the local docker image that includes the proper Linux binaries for training
cmds:
- ./utils/tasks/docker-build.sh
taskgraph-requirements:
desc: Installs the taskgraph requirements.
internal: true
cmds:
- poetry run --directory ./taskgraph -- pip3 install -r taskcluster/requirements.txt
taskgraph-validate:
desc: Validates Taskcluster task graph locally
deps: [taskgraph-requirements]
cmds:
- >-
TASKCLUSTER_ROOT_URL=""
poetry run --directory ./taskgraph --
taskgraph full
taskgraph-diff:
desc: Validates Taskcluster task graph locally
summary: |
Generates diffs of the full taskgraph against BASE_REV. Any parameters that were
different between the current code and BASE_REV will have their diffs logged
to OUTPUT_DIR.
deps: [taskgraph-requirements]
vars:
OUTPUT_FILE: '{{.OUTPUT_FILE | default "./data/taskgraph.diff"}}'
BASE_REV: '{{.BASE_REV | default "main"}}'
cmds:
- >-
TASKCLUSTER_ROOT_URL=""
poetry run --directory ./taskgraph --
taskgraph full --json
--parameters "taskcluster/test/params"
--output-file "{{.OUTPUT_FILE}}"
--diff "{{.BASE_REV}}"
taskgraph-test:
desc: Run tests and validations against task generation
cmds:
- >-
poetry run --directory taskgraph --
pytest taskcluster/test
docs:
desc: Run the GitHub pages Jekyll theme locally.
cmds:
- ./utils/tasks/serve-docs.sh
preflight-check:
desc: Perform pre-flight checks for a training run.
deps: [poetry-install-utils]
cmds:
- poetry run python -W ignore utils/preflight_check.py {{.CLI_ARGS}}
tensorboard:
desc: Visualize training logs from task `download-logs` at http://localhost:6006
summary: |
Runs Tensorboard for Marian training logs in the ./data/taskcluster-logs directory.
The logs are converted to tensorboard in the ./data/tensorboard-logs directory.
deps: [poetry-install-tensorboard]
cmds:
- mkdir -p data/tensorboard-logs
- >-
poetry run marian-tensorboard
--offline
--log-file data/taskcluster-logs/**/*.log
--work-dir data/tensorboard-logs
find-corpus:
desc: Finds all datasets for a language pair
deps: [poetry-install-utils]
cmds:
- poetry run python -W ignore utils/find_corpus.py {{.CLI_ARGS}}
run-model:
desc: Run a Marian server that loads a model from data/models/$MODEL_TASK
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore utils/run_model.py {{.CLI_ARGS}}
update-requirements:
desc: Update the requirements.txt file for a pipeline script.
summary: |
Example usage:
task update-requirements -- pipeline/eval/requirements/eval.in
cmds:
# Make sure a file was given to update.
- >-
if [[ -z "{{.CLI_ARGS}}" ]]; then
echo "Provide a path to the .in file";
echo "For example:"
echo "task update-requirements -- pipeline/eval/requirements/eval.in";
exit 1
fi
# Make sure the command is being run for docker
- >-
if [[ -z "$IS_DOCKER" ]]; then
task docker-run -- task update-requirements -- {{.CLI_ARGS}} && exit
fi
# Make sure pip-tools are available in docker.
- >-
if ! command -v pip-compile &> /dev/null; then
pip install pip-tools
fi
# Finally generate the hashes.
- pip-compile --generate-hashes {{.CLI_ARGS}} --allow-unsafe