Merge pull request #37 from fearful-symmetry/rhai_support

Add rhai support
fearful-symmetry · Feb 19, 2024 · 2325fcd · 2325fcd
2 parents dec2f95 + f7a1f0e
commit 2325fcd
Show file tree

Hide file tree

Showing 21 changed files with 421 additions and 56 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -51,5 +51,8 @@ jobs:
     - name: Post-ingest render 
       run: cargo run -- render -d examples/ingest_from_json/empty_language line
 
+    - name: rhai
+      run: cargo run -- render -d examples/rhai line
+
     - name: Globals
       run: cargo run -- render -d examples/global_transforms line
diff --git a/examples/rhai/etymology/ety.json b/examples/rhai/etymology/ety.json
@@ -0,0 +1,14 @@
+{
+    "transforms": {
+        "from-root" : {
+            "transforms": [
+                {"rhai_script": {"file": "examples/rhai/rhai/string_transform.rhai"}}
+            ]
+        },
+        "from-middle": {
+            "transforms": [
+                {"rhai_script": {"file": "examples/rhai/rhai/array_transform.rhai"}}
+            ]
+        }
+    }
+}
diff --git a/examples/rhai/readme.md b/examples/rhai/readme.md
@@ -0,0 +1,34 @@
+# Using the Rhai scripting language to transform words
+
+Kirum supports [Rhai](https://github.com/rhaiscript/rhai) for transforming words in a language tree using a simple scripting language. This allows for more complex transforms based on string manipulation and conditional logic:
+
+```rhai
+// if the etymon's language is "mylang", remove all instances of the letter "t"
+// and add the post fix "ah" if the word also starts with "el".
+if language == "mylang" {
+    updated.remove("t");
+    if updated.starts_with("el"){
+        updated = updated + "ah"
+    }
+}
+```
+
+Rhai documentation can be found [here](https://rhai.rs/book/ref/index.html).
+
+To use a rhai script, specify it as a transform:
+```json
+        "from-root" : {
+            "transforms": [
+                {"rhai_script": {"file": "rhai/string_transform.rhai"}}
+            ]
+        },
+```
+
+As demonstrated in [string_transform.rhai](rhai/string_transform.rhai), the the Rhai script exports a number of variables
+that can be used in a script to transform a word selectively based on the word's associated metadata.
+
+To render the test, run:
+
+```bash
+kirum render -d ./examples/rhai line
+```
diff --git a/examples/rhai/rhai/array_transform.rhai b/examples/rhai/rhai/array_transform.rhai
@@ -0,0 +1,8 @@
+// in addition to lemma_string
+// the word is also exported as an array
+//  This is useful in cases where the characters of a language do not align with uncode's idea of a word.
+let updated = lemma_array;
+
+updated.push("u");
+
+updated
diff --git a/examples/rhai/rhai/string_transform.rhai b/examples/rhai/rhai/string_transform.rhai
@@ -0,0 +1,33 @@
+// kirum exports the word you want to transform as `lemma_string`
+
+let updated = lemma_string;
+updated.replace("k", "c");
+
+// Note that the metadata exported into the Rhai script is the metadata of
+// the word youre *transforming from*, not the word you're *transforming into*.
+
+// Kirum also exports the part of speech as `pos`
+// see libkirum/src/word.rs for possible values.
+
+if pos == "none" {
+    updated = "ō" + updated;
+}
+
+// `language` is also available
+
+if language == "Greek" {
+    updated.remove("t");
+}
+
+// The tags associated with the word are also exported
+if tags.contains("example") {
+    updated = updated + "a";
+}
+
+//the historical_metadata field is also exported
+if metadata.contains("word") {
+    updated = updated + "l";
+}
+
+// return the value
+updated
diff --git a/examples/rhai/tree/example.json b/examples/rhai/tree/example.json
@@ -0,0 +1,18 @@
+{
+    "words": {
+        "late": {
+            "language": "Exemplum", "definition": "the final word",
+            "etymology": {"etymons": [{"etymon": "middle", "transforms": ["from-middle"]}]
+            }
+        },
+        "middle": {
+            "language": "Exemplum", "definition": "The middle word",
+            "etymology": {"etymons": [{"etymon": "root-word", "transforms":["from-root"]}]}
+        },
+        "root-word": {
+            "word": "kratia", "language": "Greek", "definition": "power, rule",
+            "tags": ["example"],
+            "historical_metadata": {"word": "example"}
+        }
+    }
+}
diff --git a/kirum/src/files.rs b/kirum/src/files.rs
@@ -240,7 +240,7 @@ pub fn read_and_compute(directory: Option<String>) -> Result<LanguageTree>{
     info!("Reading in existing language files...");
     let mut lang_tree = read_from_files(new_project)?;
     info!("rendering tree...");
-    lang_tree.compute_lexicon();
+    lang_tree.compute_lexicon()?;
     Ok(lang_tree)
 }
 

diff --git a/kirum/src/generate.rs b/kirum/src/generate.rs
@@ -27,7 +27,7 @@ pub fn daughter(daughter_ety: String,
         debug!("Creating daughter language '{}' from '{}'", lang_name, ancestor);
         computed.generate_daughter_language(lang_name.clone(), 
         processed_transforms, |l| l.language == ancestor, 
-        |l| Lexis { id: format!("daughter-from-{}", l.id), tags: [l.tags.clone(), ["autogenerated".to_string()].to_vec()].concat(), ..l.clone()});
+        |l| Lexis { id: format!("daughter-from-{}", l.id), tags: [l.tags.clone(), ["autogenerated".to_string()].to_vec()].concat(), ..l.clone()})?;
 
         let rendered_dict = computed.to_vec_etymons(|word|word.language == lang_name);
 

diff --git a/kirum/src/stat.rs b/kirum/src/stat.rs
@@ -8,6 +8,7 @@ struct Stats {
     nouns: i64,
     verbs: i64,
     adjectives: i64,
+    none: i64,
     total: usize
 }
 
@@ -21,7 +22,8 @@ pub fn gen_stats(tree: LanguageTree) -> String {
             match pos {
                 libkirum::word::PartOfSpeech::Adjective => stats.adjectives+=1,
                 libkirum::word::PartOfSpeech::Verb => stats.verbs+=1,
-                libkirum::word::PartOfSpeech::Noun => stats.nouns+=1
+                libkirum::word::PartOfSpeech::Noun => stats.nouns+=1,
+                libkirum::word::PartOfSpeech::None => stats.none+=1
             }
         }
         let lang_name = match lex.language.as_str() {

diff --git a/libkirum/Cargo.toml b/libkirum/Cargo.toml
@@ -15,4 +15,5 @@ serde_with = {version = "3.0.0", features= ["json"]}
 log = "0.4.17"
 unicode-segmentation = "1.10.1"
 rand = "0.8.5"
-env_logger = "0.9.0"
+env_logger = "0.9.0"
+rhai = "1.17.1"
diff --git a/libkirum/src/errors.rs b/libkirum/src/errors.rs
@@ -1,3 +1,6 @@
+use rhai::EvalAltResult;
+
+
 
 #[derive(thiserror::Error, Debug)]
 pub enum LangError {
@@ -21,4 +24,18 @@ pub struct PhoneticParsingError {
 #[error("invalid part of speech value {found}")]
 pub struct POSFromError {
     pub found: String
+}
+
+#[derive(thiserror::Error, Debug)]
+#[error("could not parse dynamic type {dyn_type} into Lemma. Return must be an array of strings or string")]
+pub struct LemmaFromError {
+    pub dyn_type: String,
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum TransformError {
+    #[error("error evaluating Rhai script")]
+    EvalError(#[from] Box<EvalAltResult>),
+    #[error("could not parse return value from script")]
+    ScriptReturnValueError(#[from] LemmaFromError)
 }