Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
aarppe committed May 23, 2024
2 parents aaef5fd + 7dbb4c2 commit db00959
Show file tree
Hide file tree
Showing 17 changed files with 177 additions and 41 deletions.
30 changes: 10 additions & 20 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Procfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ cwdeng: ./cwdeng-manage runserver
srseng: ./srseng-manage runserver
hdneng: ./hdneng-manage runserver
lacombe: ./crkLacombeeng-manage runserver
blaeng: ./blaeng-manage runserver

19 changes: 19 additions & 0 deletions blaeng-manage
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env python
"""
Command-line utility for administrative tasks.
"""

import os
import sys
from pathlib import Path

# sys.path[0] is initialized to the directory containing the script, which
# isn’t right for our purposes.
sys.path[0] = os.fspath(Path(sys.path[0]) / "src")

if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "blaeng.site.settings")

from django.core.management import execute_from_command_line

execute_from_command_line(sys.argv)
2 changes: 2 additions & 0 deletions docs/developers-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ Then you can access the dictionary applications at various port numbers:
- arpeng: <http://127.0.0.1:8007/>
- cwdeng: <http://127.0.0.1:8005/>
- srseng: <http://127.0.0.1:8009/>
- blaeng: <http://127.0.0.1:8011/>

Because [cookies are not port-specific for historical insecurity
reasons](https://stackoverflow.com/questions/1612177/are-http-cookies-port-specific),
Expand All @@ -232,6 +233,7 @@ adding the following to `/etc/hosts`:
127.0.0.1 cwdeng-local
127.0.0.1 crkeng-local
127.0.0.1 srseng-local
127.0.0.1 blaeng-local

Then you can access the sites with cookie isolation at
<http://crkeng-local:8000/>, <http://cwdeng-local:8005/>,
Expand Down
1 change: 1 addition & 0 deletions scripts/dev-bootstrap
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ for LANG_PAIR in \
crkeng \
cwdeng \
srseng \
blaeng \
; do
for USE_TEST_DB in true false; do
export USE_TEST_DB
Expand Down
27 changes: 13 additions & 14 deletions src/CreeDictionary/API/search/presentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ class _InitialChangeResult(AbstractResult):
@dataclass
class _LexicalEntry:
entry: List[_ReduplicationResult | SerializedWordform | _InitialChangeResult]
text: str
text: Optional[str]
url: str
id: str
id: str | int | None
type: LexicalEntryType
original_tag: FSTTag

Expand Down Expand Up @@ -173,9 +173,10 @@ def __init__(
show_emoji=self._show_emoji,
)

self.preverbs = [
lexical_entry["entry"]
self.preverbs: List[SerializedWordform] = [
cast(SerializedWordform, entry)
for lexical_entry in self.lexical_info
for entry in lexical_entry["entry"]
if lexical_entry["type"] == "Preverb"
]
self.reduplication = [
Expand Down Expand Up @@ -453,22 +454,20 @@ def get_lexical_info(
animate_emoji: str,
show_emoji: str,
dict_source: list,
) -> List:
) -> List[dict]:
if not result_analysis:
return []

result_analysis_tags = result_analysis.prefix_tags
first_letters = extract_first_letters(result_analysis)

lexical_info: List = []
lexical_info: List[_LexicalEntry] = []

for i, tag in enumerate(result_analysis_tags):
preverb_result: Optional[Preverb] = None
reduplication_string: Optional[str] = None
_type: Optional[LexicalEntryType] = None
entry: Optional[
_ReduplicationResult | SerializedWordform | _InitialChangeResult
] = None
entry = None

if tag in ["RdplW+", "RdplS+"]:
reduplication_string = generate_reduplication_string(
Expand Down Expand Up @@ -501,16 +500,16 @@ def get_lexical_info(
entries.append(entry)
url = "search?q=" + preverb_text
_type = "Preverb"
id = entries[0]["id"]
id: Optional[int] = entries[0]["id"]
result = _LexicalEntry(
entry=entries,
entry=cast(Any, entries),
text=preverb_text,
url=url,
id=id,
type=_type,
original_tag=tag,
)
lexical_info.append(serialize_lexical_entry(result))
lexical_info.append(result)
else:
# Can't find a match for the preverb in the database.
# This happens when searching against the test database for
Expand Down Expand Up @@ -548,8 +547,8 @@ def get_lexical_info(
type=_type,
original_tag=tag,
)
lexical_info.append(serialize_lexical_entry(result))
return lexical_info
lexical_info.append(result)
return [serialize_lexical_entry(entry) for entry in lexical_info]


def extract_first_letters(analysis: RichAnalysis) -> List[str]:
Expand Down
2 changes: 1 addition & 1 deletion src/CreeDictionary/search_quality/analyze_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def load_results_file(results_file: PathLike) -> SampleSearchResultsJson:
return search_results


def analyze(results_file, sample_definition: SampleDefinition = None):
def analyze(results_file, sample_definition: SampleDefinition = []):
"""
If sample_definition is None, the default will be used.
Expand Down
4 changes: 2 additions & 2 deletions src/CreeDictionary/tests/API_tests/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_search_words_with_reduplication():
search_result = results.pop()

assert len(search_result.lexical_info) == 1
assert search_result.lexical_info[0]["entry"]["text"] == "na-"
assert search_result.lexical_info[0]["entry"][0]["text"] == "na-"
assert search_result.lexical_info[0]["type"] == "Reduplication"


Expand All @@ -241,7 +241,7 @@ def test_search_words_with_inital_change():
search_result = results.pop()

assert len(search_result.lexical_info) == 1
assert search_result.lexical_info[0]["entry"]["text"] == " "
assert search_result.lexical_info[0]["entry"][0]["text"] == " "
assert search_result.lexical_info[0]["type"] == "Initial Change"


Expand Down
64 changes: 64 additions & 0 deletions src/blaeng/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Generating a new site from scratch

These are all the instructions I followed to create this new version.

```
./crkeng-manage newdictsite --port 8011 -v 2 bla eng
# Add blaeng to morphodict/src/conftest.py
# Add MORPHODICT_LANGUAGE_ENDONYM to src/blaeng/site/settings.py
# Add many other details, copying from Woods Cree settings.py, including:
# MD_SOURCE_LANGUAGE_NAME MD_SOURCE_LANGUAGE_SHORT_NAME MD_ORTHOGRAPHY MD_DICTIONARY_NAME
./blaeng-manage migrate
./blaeng-manage ensurecypressadminuser --superuser
./blaeng-manage ensuretestdb
# MISSING TRANSDUCERS
```

I had to make transducers. Generating from the instructions we just collected today!

`git clone giellalt/lang-bla`
after generating the FSTs with the default infrastructure:
```
hfst-xfst
```
And ran:
```
read lexc src/fst/morphology/lexicon.lexc
define Morphology
source src/fst/bla-phonology.xfscript
define Phonology
regex ~[ $[ "+Err/Frag" ]];
define removeFragments
regex ~[ $[ "+Err/Orth" ]];
define removeNonStandardForms
regex $[ "+N" | "+V" | "+Ipc" | "+Pron" ];
define selectDictPOS
set flag-is-epsilon ON
regex [ selectDictPOS .o. removeNonStandardForms .o. removeFragments .o. Morphology .o. Phonology ];
save stack generator-gt-dict-norm.hfst
define NormativeGenerator
regex [ [ "<" | ">" | "/" ] -> 0 ];
define removeBoundaries
load src/fst/orthography/spellrelax.compose.hfst
define SpellRelax
regex [ selectDictPOS .o. removeFragments .o. Morphology .o. Phonology .o. removeBoundaries .o. SpellRelax ];
# regex [ NormativeGenerator .o. removeBoundaries .o. SpellRelax ];
invert net
save stack analyser-gt-dict-desc.hfst
define DescriptiveAnalyser
```


And then we create the `hfstol` files with:

```
hfst-fst2fst -O -i INPUT.hfst -o OUTPUT.hfstol
```

After this, `./blaeng-manage ensuretestdb` works.

```
./blaeng-manage importjsondict src/blaeng/resources/dictionary/blaeng_test_db.importjson
```
Empty file added src/blaeng/__init__.py
Empty file.
Empty file added src/blaeng/app/__init__.py
Empty file.
Empty file added src/blaeng/db/.keep
Empty file.
4 changes: 2 additions & 2 deletions src/blaeng/resources/fst/analyser-gt-dict-desc.hfstol
Git LFS file not shown
4 changes: 2 additions & 2 deletions src/blaeng/resources/fst/generator-gt-dict-norm.hfstol
Git LFS file not shown
Empty file added src/blaeng/site/__init__.py
Empty file.
58 changes: 58 additions & 0 deletions src/blaeng/site/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Django settings for blaeng.
For more information on this file, see
https://docs.djangoproject.com/en/3.2/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/3.2/ref/settings/
"""

from pathlib import Path

from morphodict.site import base_dir_setup

BASE_DIR = Path(__file__).resolve().parent.parent

base_dir_setup.set_base_dir(BASE_DIR)

from morphodict.site.settings import *

# Where this application should be deployed:
PRODUCTION_HOST = "blaeng.altlab.dev"

ALLOWED_HOSTS.append(PRODUCTION_HOST)

DEFAULT_RUNSERVER_PORT = 8011

INSTALLED_APPS.insert(0, "blaeng.app")

# Morphodict configuration

# The ISO 639-1 code is used in the lang="" attributes in HTML.
MORPHODICT_ISO_639_1_CODE = "bla"

MORPHODICT_SOURCE_LANGUAGE = "bla"
MORPHODICT_TARGET_LANGUAGE = "eng"

MORPHODICT_SOURCE_LANGUAGE_NAME = "Blackfoot"
MORPHODICT_SOURCE_LANGUAGE_SHORT_NAME = "Blackfoot"

MORPHODICT_LANGUAGE_ENDONYM = "ᓱᖽᐧᖿ"

MORPHODICT_DICTIONARY_NAME = "sínaakia’tsis"

RELAXED_ANALYZER_FST_FILENAME = "analyser-gt-dict-desc.hfstol"
STRICT_ANALYZER_FST_FILENAME = RELAXED_ANALYZER_FST_FILENAME
STRICT_GENERATOR_FST_FILENAME = "generator-gt-dict-norm.hfstol"

MORPHODICT_ORTHOGRAPHY = {
"default": "Latn",
"available": {
"Latn": {"name": "Latin"},
"Cans": {
"name": "Syllabics",
"converter": "CreeDictionary.CreeDictionary.orthography.to_syllabics",
},
},
}
1 change: 1 addition & 0 deletions src/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
"hdneng/site/settings.py",
"srseng/site/settings.py",
"srseng/site/settings_mobile.py",
"blaeng/site/settings.py",
]

0 comments on commit db00959

Please sign in to comment.