diff --git a/Pipfile.lock b/Pipfile.lock index c85b8aa7f..95af74202 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -114,16 +114,12 @@ "index": "pypi", "version": "==2021.7.26" }, - "dawg": { + "dawg2": { "hashes": [ - "sha256:34881e06278d4a54cf0b402c0c8b587bef0caa78f0eee595adc7a2aa530e48ce", - "sha256:73760ad1272b1b47997f1a768b8f3bf547c92475bcd62185f4ab7e1bc691964e", - "sha256:7aecc4c89243edaf1efe7a4d769d993a7cd9307a8a04f48e07c4fc7c44bdd38f", - "sha256:83ce4a73f7632b0ed31af16c2750533ecbed347bad1148a52f6436e348b5b7ac", - "sha256:fb90b799fb7d6d728531840529c812a9ee17736da71e8a596ede8bfd6c62bf36" + "sha256:d8cbf0ddc15882b723848bf7d849a6d3186a15945738b3cf4bf8cfc810cda2db" ], "index": "pypi", - "version": "==0.8.0" + "version": "==0.13.0" }, "dj-database-url": { "hashes": [ @@ -403,10 +399,10 @@ }, "uwsgi": { "hashes": [ - "sha256:88ab9867d8973d8ae84719cf233b7dafc54326fcaec89683c3f9f77c002cdff9" + "sha256:d653d2d804c194c8cbe2585fa56efa2650313ae75c686a9d7931374d4dfbfc6e" ], "index": "pypi", - "version": "==2.0.20" + "version": "==2.0.25.1" }, "whitenoise": { "hashes": [ @@ -633,12 +629,13 @@ }, "codecov": { "hashes": [ - "sha256:585dc217dc3d8185198ceb402f85d5cb5dbfa0c5f350a5abcdf9e347776a5b47", - "sha256:782a8e5352f22593cbc5427a35320b99490eb24d9dcfa2155fd99d2b75cfb635", - "sha256:a0da46bb5025426da895af90938def8ee12d37fcbcbbbc15b6dc64cf7ebc51c1" + "sha256:2362b685633caeaf45b9951a9b76ce359cd3581dd515b430c6c3f5dfb4d92a8c", + "sha256:7d2b16c1153d01579a89a94ff14f9dbeb63634ee79e18c11036f34e7de66cbc9", + "sha256:c2ca5e51bba9ebb43644c43d0690148a55086f7f5e6fd36170858fa4206744d5" ], "index": "pypi", - "version": "==2.1.12" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.1.13" }, "coverage": { "hashes": [ @@ -1065,13 +1062,6 @@ "index": "pypi", "version": "==0.16.1" }, - "nb-black": { - "hashes": [ - "sha256:1ca52e3a46675f6a0a6d79ac73a1f8f951bef60f919eced56173e76ab1b6d62b" - ], - "index": "pypi", - "version": "==1.0.7" - }, "nbclassic": { "hashes": [ "sha256:4b01076effdac53e775cd1b6a4e891663568b32621468e205b502a23b2921899", diff --git a/Procfile b/Procfile index 06ae0c0dc..b338bb538 100644 --- a/Procfile +++ b/Procfile @@ -14,3 +14,5 @@ cwdeng: ./cwdeng-manage runserver srseng: ./srseng-manage runserver hdneng: ./hdneng-manage runserver lacombe: ./crkLacombeeng-manage runserver +blaeng: ./blaeng-manage runserver + diff --git a/blaeng-manage b/blaeng-manage new file mode 100755 index 000000000..c81d60414 --- /dev/null +++ b/blaeng-manage @@ -0,0 +1,19 @@ +#!/usr/bin/env python +""" +Command-line utility for administrative tasks. +""" + +import os +import sys +from pathlib import Path + +# sys.path[0] is initialized to the directory containing the script, which +# isn’t right for our purposes. +sys.path[0] = os.fspath(Path(sys.path[0]) / "src") + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "blaeng.site.settings") + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) diff --git a/docs/developers-guide.md b/docs/developers-guide.md index 04fa6c8a6..9b6e70ed2 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -221,6 +221,7 @@ Then you can access the dictionary applications at various port numbers: - arpeng: - cwdeng: - srseng: + - blaeng: Because [cookies are not port-specific for historical insecurity reasons](https://stackoverflow.com/questions/1612177/are-http-cookies-port-specific), @@ -232,6 +233,7 @@ adding the following to `/etc/hosts`: 127.0.0.1 cwdeng-local 127.0.0.1 crkeng-local 127.0.0.1 srseng-local + 127.0.0.1 blaeng-local Then you can access the sites with cookie isolation at , , diff --git a/scripts/dev-bootstrap b/scripts/dev-bootstrap index a1d41dbef..495cc4199 100755 --- a/scripts/dev-bootstrap +++ b/scripts/dev-bootstrap @@ -20,6 +20,7 @@ for LANG_PAIR in \ crkeng \ cwdeng \ srseng \ + blaeng \ ; do for USE_TEST_DB in true false; do export USE_TEST_DB diff --git a/src/CreeDictionary/API/search/presentation.py b/src/CreeDictionary/API/search/presentation.py index ead4d238b..55d576022 100644 --- a/src/CreeDictionary/API/search/presentation.py +++ b/src/CreeDictionary/API/search/presentation.py @@ -56,9 +56,9 @@ class _InitialChangeResult(AbstractResult): @dataclass class _LexicalEntry: entry: List[_ReduplicationResult | SerializedWordform | _InitialChangeResult] - text: str + text: Optional[str] url: str - id: str + id: str | int | None type: LexicalEntryType original_tag: FSTTag @@ -173,9 +173,10 @@ def __init__( show_emoji=self._show_emoji, ) - self.preverbs = [ - lexical_entry["entry"] + self.preverbs: List[SerializedWordform] = [ + cast(SerializedWordform, entry) for lexical_entry in self.lexical_info + for entry in lexical_entry["entry"] if lexical_entry["type"] == "Preverb" ] self.reduplication = [ @@ -453,22 +454,20 @@ def get_lexical_info( animate_emoji: str, show_emoji: str, dict_source: list, -) -> List: +) -> List[dict]: if not result_analysis: return [] result_analysis_tags = result_analysis.prefix_tags first_letters = extract_first_letters(result_analysis) - lexical_info: List = [] + lexical_info: List[_LexicalEntry] = [] for i, tag in enumerate(result_analysis_tags): preverb_result: Optional[Preverb] = None reduplication_string: Optional[str] = None _type: Optional[LexicalEntryType] = None - entry: Optional[ - _ReduplicationResult | SerializedWordform | _InitialChangeResult - ] = None + entry = None if tag in ["RdplW+", "RdplS+"]: reduplication_string = generate_reduplication_string( @@ -501,16 +500,16 @@ def get_lexical_info( entries.append(entry) url = "search?q=" + preverb_text _type = "Preverb" - id = entries[0]["id"] + id: Optional[int] = entries[0]["id"] result = _LexicalEntry( - entry=entries, + entry=cast(Any, entries), text=preverb_text, url=url, id=id, type=_type, original_tag=tag, ) - lexical_info.append(serialize_lexical_entry(result)) + lexical_info.append(result) else: # Can't find a match for the preverb in the database. # This happens when searching against the test database for @@ -548,8 +547,8 @@ def get_lexical_info( type=_type, original_tag=tag, ) - lexical_info.append(serialize_lexical_entry(result)) - return lexical_info + lexical_info.append(result) + return [serialize_lexical_entry(entry) for entry in lexical_info] def extract_first_letters(analysis: RichAnalysis) -> List[str]: diff --git a/src/CreeDictionary/search_quality/analyze_results.py b/src/CreeDictionary/search_quality/analyze_results.py index d6dc30f8f..26ea4a678 100644 --- a/src/CreeDictionary/search_quality/analyze_results.py +++ b/src/CreeDictionary/search_quality/analyze_results.py @@ -130,7 +130,7 @@ def load_results_file(results_file: PathLike) -> SampleSearchResultsJson: return search_results -def analyze(results_file, sample_definition: SampleDefinition = None): +def analyze(results_file, sample_definition: SampleDefinition = []): """ If sample_definition is None, the default will be used. diff --git a/src/CreeDictionary/tests/API_tests/model_test.py b/src/CreeDictionary/tests/API_tests/model_test.py index 56ba91f7d..feed3f0c5 100644 --- a/src/CreeDictionary/tests/API_tests/model_test.py +++ b/src/CreeDictionary/tests/API_tests/model_test.py @@ -227,7 +227,7 @@ def test_search_words_with_reduplication(): search_result = results.pop() assert len(search_result.lexical_info) == 1 - assert search_result.lexical_info[0]["entry"]["text"] == "na-" + assert search_result.lexical_info[0]["entry"][0]["text"] == "na-" assert search_result.lexical_info[0]["type"] == "Reduplication" @@ -241,7 +241,7 @@ def test_search_words_with_inital_change(): search_result = results.pop() assert len(search_result.lexical_info) == 1 - assert search_result.lexical_info[0]["entry"]["text"] == " " + assert search_result.lexical_info[0]["entry"][0]["text"] == " " assert search_result.lexical_info[0]["type"] == "Initial Change" diff --git a/src/blaeng/README.md b/src/blaeng/README.md new file mode 100644 index 000000000..6d8d6ac51 --- /dev/null +++ b/src/blaeng/README.md @@ -0,0 +1,64 @@ +# Generating a new site from scratch + +These are all the instructions I followed to create this new version. + +``` +./crkeng-manage newdictsite --port 8011 -v 2 bla eng +# Add blaeng to morphodict/src/conftest.py +# Add MORPHODICT_LANGUAGE_ENDONYM to src/blaeng/site/settings.py +# Add many other details, copying from Woods Cree settings.py, including: +# MD_SOURCE_LANGUAGE_NAME MD_SOURCE_LANGUAGE_SHORT_NAME MD_ORTHOGRAPHY MD_DICTIONARY_NAME +./blaeng-manage migrate +./blaeng-manage ensurecypressadminuser --superuser +./blaeng-manage ensuretestdb +# MISSING TRANSDUCERS + +``` + +I had to make transducers. Generating from the instructions we just collected today! + +`git clone giellalt/lang-bla` +after generating the FSTs with the default infrastructure: +``` +hfst-xfst +``` +And ran: +``` +read lexc src/fst/morphology/lexicon.lexc +define Morphology +source src/fst/bla-phonology.xfscript +define Phonology +regex ~[ $[ "+Err/Frag" ]]; +define removeFragments + +regex ~[ $[ "+Err/Orth" ]]; +define removeNonStandardForms +regex $[ "+N" | "+V" | "+Ipc" | "+Pron" ]; +define selectDictPOS +set flag-is-epsilon ON +regex [ selectDictPOS .o. removeNonStandardForms .o. removeFragments .o. Morphology .o. Phonology ]; +save stack generator-gt-dict-norm.hfst +define NormativeGenerator +regex [ [ "<" | ">" | "/" ] -> 0 ]; +define removeBoundaries +load src/fst/orthography/spellrelax.compose.hfst +define SpellRelax +regex [ selectDictPOS .o. removeFragments .o. Morphology .o. Phonology .o. removeBoundaries .o. SpellRelax ]; +# regex [ NormativeGenerator .o. removeBoundaries .o. SpellRelax ]; +invert net +save stack analyser-gt-dict-desc.hfst +define DescriptiveAnalyser +``` + + +And then we create the `hfstol` files with: + +``` +hfst-fst2fst -O -i INPUT.hfst -o OUTPUT.hfstol +``` + +After this, `./blaeng-manage ensuretestdb` works. + +``` +./blaeng-manage importjsondict src/blaeng/resources/dictionary/blaeng_test_db.importjson +``` diff --git a/src/blaeng/__init__.py b/src/blaeng/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/blaeng/app/__init__.py b/src/blaeng/app/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/blaeng/db/.keep b/src/blaeng/db/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol b/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol index 6ffae79f9..fbff3ad7d 100644 --- a/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol +++ b/src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8f05c6a923d890459206ed83da8e04ada2f188f4f8d0db1c71bf79ee097f00c -size 1509739 +oid sha256:e4b2051d66b1d53881c3d250476df856f9c5bdebf0af41d5bbe41d2d4411dd6f +size 288 diff --git a/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol b/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol index 393122f9d..29f39890f 100644 --- a/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol +++ b/src/blaeng/resources/fst/generator-gt-dict-norm.hfstol @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33e0fd3b96d81d3db27508cd3d59cdeee5458dd202c2c077406a17f29eeaf0ee -size 1089694 +oid sha256:64d9dfe14d6a0df638d86a1fa5ed5fda6da8613e9ed2473f1210962759da44f6 +size 288 diff --git a/src/blaeng/site/__init__.py b/src/blaeng/site/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/blaeng/site/settings.py b/src/blaeng/site/settings.py new file mode 100644 index 000000000..c980c2e66 --- /dev/null +++ b/src/blaeng/site/settings.py @@ -0,0 +1,58 @@ +""" +Django settings for blaeng. + +For more information on this file, see +https://docs.djangoproject.com/en/3.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/3.2/ref/settings/ +""" + +from pathlib import Path + +from morphodict.site import base_dir_setup + +BASE_DIR = Path(__file__).resolve().parent.parent + +base_dir_setup.set_base_dir(BASE_DIR) + +from morphodict.site.settings import * + +# Where this application should be deployed: +PRODUCTION_HOST = "blaeng.altlab.dev" + +ALLOWED_HOSTS.append(PRODUCTION_HOST) + +DEFAULT_RUNSERVER_PORT = 8011 + +INSTALLED_APPS.insert(0, "blaeng.app") + +# Morphodict configuration + +# The ISO 639-1 code is used in the lang="" attributes in HTML. +MORPHODICT_ISO_639_1_CODE = "bla" + +MORPHODICT_SOURCE_LANGUAGE = "bla" +MORPHODICT_TARGET_LANGUAGE = "eng" + +MORPHODICT_SOURCE_LANGUAGE_NAME = "Blackfoot" +MORPHODICT_SOURCE_LANGUAGE_SHORT_NAME = "Blackfoot" + +MORPHODICT_LANGUAGE_ENDONYM = "ᓱᖽᐧᖿ" + +MORPHODICT_DICTIONARY_NAME = "sínaakia’tsis" + +RELAXED_ANALYZER_FST_FILENAME = "analyser-gt-dict-desc.hfstol" +STRICT_ANALYZER_FST_FILENAME = RELAXED_ANALYZER_FST_FILENAME +STRICT_GENERATOR_FST_FILENAME = "generator-gt-dict-norm.hfstol" + +MORPHODICT_ORTHOGRAPHY = { + "default": "Latn", + "available": { + "Latn": {"name": "Latin"}, + "Cans": { + "name": "Syllabics", + "converter": "CreeDictionary.CreeDictionary.orthography.to_syllabics", + }, + }, +} diff --git a/src/conftest.py b/src/conftest.py index 0a8e9069e..fc325ae72 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -9,4 +9,5 @@ "hdneng/site/settings.py", "srseng/site/settings.py", "srseng/site/settings_mobile.py", + "blaeng/site/settings.py", ]