-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
146 lines (123 loc) · 4.67 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
.PHONY: clean data lint requirements
#################################################################################
# GLOBALS #
#################################################################################
#################################################################################
# SETUP #
#################################################################################
## Activate poetry environment
activate_poetry: install_poetry
poetry shell
## Install poetry environment
install_poetry:
poetry install
poetry run pre-commit install
## Run tests
tests:
pytest
#################################################################################
# PROJECT RULES #
#################################################################################
## Evaluate performance for latest model rul
eval_model: |
$(info Evaluating model)
python src/cgpos/eval/eval.py
## Train model and evaluate performance
train_model: | build_features
$(info Training model)
python src/cgpos/eval/train.py
$(info Evaluating model)
python src/cgpos/eval/eval.py
## Build features
build_features: | data/reference/features_map.pkl data/processed/features.pkl
data/reference/features_map.pkl data/processed/features.pkl: | make_dataset
$(info Building features)
python src/cgpos/data/featurize.py
## Make Perseus dataset
make_dataset: | data/interim/perseus_raw.pkl data/reference/targets_map.pkl data/interim/perseus_normalized.pkl data/processed/cleaned.pkl
data/interim/perseus_raw.pkl data/reference/targets_map.pkl data/interim/perseus_normalized.pkl data/processed/cleaned.pkl: | get_data
$(info Making dataset)
python src/cgpos/data/process.py
## Get raw data
get_data: | data/raw/treebank_data-master/README.md data/raw/Greek-Dependency-Trees-master/README.md
data/raw/treebank_data-master/README.md: | data/raw/zip
$(info Grabbing Perseus data)
curl -Lo data/raw/zip/perseus.zip https://github.com/PerseusDL/treebank_data/archive/master.zip
unzip data/raw/zip/perseus.zip -d data/raw
data/raw/Greek-Dependency-Trees-master/README.md: | data/raw/zip
$(info Grabbing Greek Dependency Trees data)
curl -Lo data/raw/zip/gorman.zip https://github.com/vgorman1/Greek-Dependency-Trees/archive/master.zip
unzip data/raw/zip/gorman.zip -d data/raw
data/raw/zip: | init_data_dir
mkdir $@
## Initialize data directory
init_data_dir: | data/raw data/processed data/interim data/reference
$(info Initializing data directory)
data/raw data/processed data/interim data/reference:
mkdir -p $@
## Remove processed data
remove_data: init_data_dir
$(info Removing project data (excluding data/raw))
rm -rf data/processed/* data/interim/* data/reference/*
## Remove all data
remove_all_data: init_data_dir
$(info Removing all data)
rm -rf data/raw/* data/processed/* data/interim/* data/reference/*
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL := help
# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY: help
help:
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
@echo
@sed -n -e "/^## / { \
h; \
s/.*//; \
:doc" \
-e "H; \
n; \
s/^## //; \
t doc" \
-e "s/:.*//; \
G; \
s/\\n## /---/; \
s/\\n/ /g; \
p; \
}" ${MAKEFILE_LIST} \
| LC_ALL='C' sort --ignore-case \
| awk -F '---' \
-v ncol=$$(tput cols) \
-v indent=19 \
-v col_on="$$(tput setaf 6)" \
-v col_off="$$(tput sgr0)" \
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')