diff --git a/Pipfile.lock b/Pipfile.lock
index c85b8aa7f..95af74202 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -114,16 +114,12 @@
"index": "pypi",
"version": "==2021.7.26"
},
- "dawg": {
+ "dawg2": {
"hashes": [
- "sha256:34881e06278d4a54cf0b402c0c8b587bef0caa78f0eee595adc7a2aa530e48ce",
- "sha256:73760ad1272b1b47997f1a768b8f3bf547c92475bcd62185f4ab7e1bc691964e",
- "sha256:7aecc4c89243edaf1efe7a4d769d993a7cd9307a8a04f48e07c4fc7c44bdd38f",
- "sha256:83ce4a73f7632b0ed31af16c2750533ecbed347bad1148a52f6436e348b5b7ac",
- "sha256:fb90b799fb7d6d728531840529c812a9ee17736da71e8a596ede8bfd6c62bf36"
+ "sha256:d8cbf0ddc15882b723848bf7d849a6d3186a15945738b3cf4bf8cfc810cda2db"
],
"index": "pypi",
- "version": "==0.8.0"
+ "version": "==0.13.0"
},
"dj-database-url": {
"hashes": [
@@ -403,10 +399,10 @@
},
"uwsgi": {
"hashes": [
- "sha256:88ab9867d8973d8ae84719cf233b7dafc54326fcaec89683c3f9f77c002cdff9"
+ "sha256:d653d2d804c194c8cbe2585fa56efa2650313ae75c686a9d7931374d4dfbfc6e"
],
"index": "pypi",
- "version": "==2.0.20"
+ "version": "==2.0.25.1"
},
"whitenoise": {
"hashes": [
@@ -633,12 +629,13 @@
},
"codecov": {
"hashes": [
- "sha256:585dc217dc3d8185198ceb402f85d5cb5dbfa0c5f350a5abcdf9e347776a5b47",
- "sha256:782a8e5352f22593cbc5427a35320b99490eb24d9dcfa2155fd99d2b75cfb635",
- "sha256:a0da46bb5025426da895af90938def8ee12d37fcbcbbbc15b6dc64cf7ebc51c1"
+ "sha256:2362b685633caeaf45b9951a9b76ce359cd3581dd515b430c6c3f5dfb4d92a8c",
+ "sha256:7d2b16c1153d01579a89a94ff14f9dbeb63634ee79e18c11036f34e7de66cbc9",
+ "sha256:c2ca5e51bba9ebb43644c43d0690148a55086f7f5e6fd36170858fa4206744d5"
],
"index": "pypi",
- "version": "==2.1.12"
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==2.1.13"
},
"coverage": {
"hashes": [
@@ -1065,13 +1062,6 @@
"index": "pypi",
"version": "==0.16.1"
},
- "nb-black": {
- "hashes": [
- "sha256:1ca52e3a46675f6a0a6d79ac73a1f8f951bef60f919eced56173e76ab1b6d62b"
- ],
- "index": "pypi",
- "version": "==1.0.7"
- },
"nbclassic": {
"hashes": [
"sha256:4b01076effdac53e775cd1b6a4e891663568b32621468e205b502a23b2921899",
diff --git a/Procfile b/Procfile
index 06ae0c0dc..b338bb538 100644
--- a/Procfile
+++ b/Procfile
@@ -14,3 +14,5 @@ cwdeng: ./cwdeng-manage runserver
srseng: ./srseng-manage runserver
hdneng: ./hdneng-manage runserver
lacombe: ./crkLacombeeng-manage runserver
+blaeng: ./blaeng-manage runserver
+
diff --git a/blaeng-manage b/blaeng-manage
new file mode 100755
index 000000000..c81d60414
--- /dev/null
+++ b/blaeng-manage
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+"""
+Command-line utility for administrative tasks.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+# sys.path[0] is initialized to the directory containing the script, which
+# isn’t right for our purposes.
+sys.path[0] = os.fspath(Path(sys.path[0]) / "src")
+
+if __name__ == "__main__":
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "blaeng.site.settings")
+
+ from django.core.management import execute_from_command_line
+
+ execute_from_command_line(sys.argv)
diff --git a/docs/developers-guide.md b/docs/developers-guide.md
index 04fa6c8a6..9b6e70ed2 100644
--- a/docs/developers-guide.md
+++ b/docs/developers-guide.md
@@ -221,6 +221,7 @@ Then you can access the dictionary applications at various port numbers:
- arpeng:
- cwdeng:
- srseng:
+ - blaeng:
Because [cookies are not port-specific for historical insecurity
reasons](https://stackoverflow.com/questions/1612177/are-http-cookies-port-specific),
@@ -232,6 +233,7 @@ adding the following to `/etc/hosts`:
127.0.0.1 cwdeng-local
127.0.0.1 crkeng-local
127.0.0.1 srseng-local
+ 127.0.0.1 blaeng-local
Then you can access the sites with cookie isolation at
, ,
diff --git a/scripts/dev-bootstrap b/scripts/dev-bootstrap
index a1d41dbef..495cc4199 100755
--- a/scripts/dev-bootstrap
+++ b/scripts/dev-bootstrap
@@ -20,6 +20,7 @@ for LANG_PAIR in \
crkeng \
cwdeng \
srseng \
+ blaeng \
; do
for USE_TEST_DB in true false; do
export USE_TEST_DB
diff --git a/src/CreeDictionary/API/search/presentation.py b/src/CreeDictionary/API/search/presentation.py
index ead4d238b..55d576022 100644
--- a/src/CreeDictionary/API/search/presentation.py
+++ b/src/CreeDictionary/API/search/presentation.py
@@ -56,9 +56,9 @@ class _InitialChangeResult(AbstractResult):
@dataclass
class _LexicalEntry:
entry: List[_ReduplicationResult | SerializedWordform | _InitialChangeResult]
- text: str
+ text: Optional[str]
url: str
- id: str
+ id: str | int | None
type: LexicalEntryType
original_tag: FSTTag
@@ -173,9 +173,10 @@ def __init__(
show_emoji=self._show_emoji,
)
- self.preverbs = [
- lexical_entry["entry"]
+ self.preverbs: List[SerializedWordform] = [
+ cast(SerializedWordform, entry)
for lexical_entry in self.lexical_info
+ for entry in lexical_entry["entry"]
if lexical_entry["type"] == "Preverb"
]
self.reduplication = [
@@ -453,22 +454,20 @@ def get_lexical_info(
animate_emoji: str,
show_emoji: str,
dict_source: list,
-) -> List:
+) -> List[dict]:
if not result_analysis:
return []
result_analysis_tags = result_analysis.prefix_tags
first_letters = extract_first_letters(result_analysis)
- lexical_info: List = []
+ lexical_info: List[_LexicalEntry] = []
for i, tag in enumerate(result_analysis_tags):
preverb_result: Optional[Preverb] = None
reduplication_string: Optional[str] = None
_type: Optional[LexicalEntryType] = None
- entry: Optional[
- _ReduplicationResult | SerializedWordform | _InitialChangeResult
- ] = None
+ entry = None
if tag in ["RdplW+", "RdplS+"]:
reduplication_string = generate_reduplication_string(
@@ -501,16 +500,16 @@ def get_lexical_info(
entries.append(entry)
url = "search?q=" + preverb_text
_type = "Preverb"
- id = entries[0]["id"]
+ id: Optional[int] = entries[0]["id"]
result = _LexicalEntry(
- entry=entries,
+ entry=cast(Any, entries),
text=preverb_text,
url=url,
id=id,
type=_type,
original_tag=tag,
)
- lexical_info.append(serialize_lexical_entry(result))
+ lexical_info.append(result)
else:
# Can't find a match for the preverb in the database.
# This happens when searching against the test database for
@@ -548,8 +547,8 @@ def get_lexical_info(
type=_type,
original_tag=tag,
)
- lexical_info.append(serialize_lexical_entry(result))
- return lexical_info
+ lexical_info.append(result)
+ return [serialize_lexical_entry(entry) for entry in lexical_info]
def extract_first_letters(analysis: RichAnalysis) -> List[str]:
diff --git a/src/CreeDictionary/search_quality/analyze_results.py b/src/CreeDictionary/search_quality/analyze_results.py
index d6dc30f8f..26ea4a678 100644
--- a/src/CreeDictionary/search_quality/analyze_results.py
+++ b/src/CreeDictionary/search_quality/analyze_results.py
@@ -130,7 +130,7 @@ def load_results_file(results_file: PathLike) -> SampleSearchResultsJson:
return search_results
-def analyze(results_file, sample_definition: SampleDefinition = None):
+def analyze(results_file, sample_definition: SampleDefinition = []):
"""
If sample_definition is None, the default will be used.
diff --git a/src/CreeDictionary/tests/API_tests/model_test.py b/src/CreeDictionary/tests/API_tests/model_test.py
index 56ba91f7d..feed3f0c5 100644
--- a/src/CreeDictionary/tests/API_tests/model_test.py
+++ b/src/CreeDictionary/tests/API_tests/model_test.py
@@ -227,7 +227,7 @@ def test_search_words_with_reduplication():
search_result = results.pop()
assert len(search_result.lexical_info) == 1
- assert search_result.lexical_info[0]["entry"]["text"] == "na-"
+ assert search_result.lexical_info[0]["entry"][0]["text"] == "na-"
assert search_result.lexical_info[0]["type"] == "Reduplication"
@@ -241,7 +241,7 @@ def test_search_words_with_inital_change():
search_result = results.pop()
assert len(search_result.lexical_info) == 1
- assert search_result.lexical_info[0]["entry"]["text"] == " "
+ assert search_result.lexical_info[0]["entry"][0]["text"] == " "
assert search_result.lexical_info[0]["type"] == "Initial Change"
diff --git a/src/blaeng/README.md b/src/blaeng/README.md
new file mode 100644
index 000000000..6d8d6ac51
--- /dev/null
+++ b/src/blaeng/README.md
@@ -0,0 +1,64 @@
+# Generating a new site from scratch
+
+These are all the instructions I followed to create this new version.
+
+```
+./crkeng-manage newdictsite --port 8011 -v 2 bla eng
+# Add blaeng to morphodict/src/conftest.py
+# Add MORPHODICT_LANGUAGE_ENDONYM to src/blaeng/site/settings.py
+# Add many other details, copying from Woods Cree settings.py, including:
+# MD_SOURCE_LANGUAGE_NAME MD_SOURCE_LANGUAGE_SHORT_NAME MD_ORTHOGRAPHY MD_DICTIONARY_NAME
+./blaeng-manage migrate
+./blaeng-manage ensurecypressadminuser --superuser
+./blaeng-manage ensuretestdb
+# MISSING TRANSDUCERS
+
+```
+
+I had to make transducers. Generating from the instructions we just collected today!
+
+`git clone giellalt/lang-bla`
+after generating the FSTs with the default infrastructure:
+```
+hfst-xfst
+```
+And ran:
+```
+read lexc src/fst/morphology/lexicon.lexc
+define Morphology
+source src/fst/bla-phonology.xfscript
+define Phonology
+regex ~[ $[ "+Err/Frag" ]];
+define removeFragments
+
+regex ~[ $[ "+Err/Orth" ]];
+define removeNonStandardForms
+regex $[ "+N" | "+V" | "+Ipc" | "+Pron" ];
+define selectDictPOS
+set flag-is-epsilon ON
+regex [ selectDictPOS .o. removeNonStandardForms .o. removeFragments .o. Morphology .o. Phonology ];
+save stack generator-gt-dict-norm.hfst
+define NormativeGenerator
+regex [ [ "<" | ">" | "/" ] -> 0 ];
+define removeBoundaries
+load src/fst/orthography/spellrelax.compose.hfst
+define SpellRelax
+regex [ selectDictPOS .o. removeFragments .o. Morphology .o. Phonology .o. removeBoundaries .o. SpellRelax ];
+# regex [ NormativeGenerator .o. removeBoundaries .o. SpellRelax ];
+invert net
+save stack analyser-gt-dict-desc.hfst
+define DescriptiveAnalyser
+```
+
+
+And then we create the `hfstol` files with:
+
+```
+hfst-fst2fst -O -i INPUT.hfst -o OUTPUT.hfstol
+```
+
+After this, `./blaeng-manage ensuretestdb` works.
+
+```
+./blaeng-manage importjsondict src/blaeng/resources/dictionary/blaeng_test_db.importjson
+```
diff --git a/src/blaeng/__init__.py b/src/blaeng/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/blaeng/app/__init__.py b/src/blaeng/app/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/blaeng/db/.keep b/src/blaeng/db/.keep
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol b/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol
index 6ffae79f9..fbff3ad7d 100644
--- a/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol
+++ b/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:b8f05c6a923d890459206ed83da8e04ada2f188f4f8d0db1c71bf79ee097f00c
-size 1509739
+oid sha256:e4b2051d66b1d53881c3d250476df856f9c5bdebf0af41d5bbe41d2d4411dd6f
+size 288
diff --git a/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol b/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol
index 393122f9d..29f39890f 100644
--- a/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol
+++ b/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:33e0fd3b96d81d3db27508cd3d59cdeee5458dd202c2c077406a17f29eeaf0ee
-size 1089694
+oid sha256:64d9dfe14d6a0df638d86a1fa5ed5fda6da8613e9ed2473f1210962759da44f6
+size 288
diff --git a/src/blaeng/site/__init__.py b/src/blaeng/site/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/blaeng/site/settings.py b/src/blaeng/site/settings.py
new file mode 100644
index 000000000..c980c2e66
--- /dev/null
+++ b/src/blaeng/site/settings.py
@@ -0,0 +1,58 @@
+"""
+Django settings for blaeng.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/3.2/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/3.2/ref/settings/
+"""
+
+from pathlib import Path
+
+from morphodict.site import base_dir_setup
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+base_dir_setup.set_base_dir(BASE_DIR)
+
+from morphodict.site.settings import *
+
+# Where this application should be deployed:
+PRODUCTION_HOST = "blaeng.altlab.dev"
+
+ALLOWED_HOSTS.append(PRODUCTION_HOST)
+
+DEFAULT_RUNSERVER_PORT = 8011
+
+INSTALLED_APPS.insert(0, "blaeng.app")
+
+# Morphodict configuration
+
+# The ISO 639-1 code is used in the lang="" attributes in HTML.
+MORPHODICT_ISO_639_1_CODE = "bla"
+
+MORPHODICT_SOURCE_LANGUAGE = "bla"
+MORPHODICT_TARGET_LANGUAGE = "eng"
+
+MORPHODICT_SOURCE_LANGUAGE_NAME = "Blackfoot"
+MORPHODICT_SOURCE_LANGUAGE_SHORT_NAME = "Blackfoot"
+
+MORPHODICT_LANGUAGE_ENDONYM = "ᓱᖽᐧᖿ"
+
+MORPHODICT_DICTIONARY_NAME = "sínaakia’tsis"
+
+RELAXED_ANALYZER_FST_FILENAME = "analyser-gt-dict-desc.hfstol"
+STRICT_ANALYZER_FST_FILENAME = RELAXED_ANALYZER_FST_FILENAME
+STRICT_GENERATOR_FST_FILENAME = "generator-gt-dict-norm.hfstol"
+
+MORPHODICT_ORTHOGRAPHY = {
+ "default": "Latn",
+ "available": {
+ "Latn": {"name": "Latin"},
+ "Cans": {
+ "name": "Syllabics",
+ "converter": "CreeDictionary.CreeDictionary.orthography.to_syllabics",
+ },
+ },
+}
diff --git a/src/conftest.py b/src/conftest.py
index 0a8e9069e..fc325ae72 100644
--- a/src/conftest.py
+++ b/src/conftest.py
@@ -9,4 +9,5 @@
"hdneng/site/settings.py",
"srseng/site/settings.py",
"srseng/site/settings_mobile.py",
+ "blaeng/site/settings.py",
]